{
"type": "SET_VALUE",
"ref": "/apps/knowledge/topics/courses/direct-preference-optimization-your-language-model--dpo-direct-preference-optimization/.info",
"value": {
"title": "Direct Preference Optimization: Your Language Model is Secretly a Reward Model — (Dpo Direct Preference Optimization)",
"description": "DPO introduces a simple classification loss that directly optimizes language model policies on human preference data, eliminating the need for reinforcement learning while maintaining theoretical equivalence to the RLHF objective.",
"created_at": 1773709186129,
"created_by": "0x2a52d97BD3a21e8Ca6ac6b5A06705B82496C9e77"
}
}