{
"type": "SET_VALUE",
"ref": "/apps/knowledge/topics/courses/direct-preference-optimization-your-language-model--dpo-direct-preference-optimization/.info",
"value": {
"title": "Direct Preference Optimization: Your Language Model is Secretly a Reward Model —",
"description": "DPO introduces a simple classification loss that directly optimizes language model policies on human preference data, eliminating the need for reinforcement learning while maintaining theoretical equivalence to the RLHF objective.",
"created_at": 1775461229829,
"created_by": "0x86fF4A93E15d6DD4B82146eE655eB01dc8Fc9962"
}
}