{
"type": "SET_VALUE",
"ref": "/apps/knowledge/topics/courses/direct-preference-optimization-your-language-model--dpo-direct-preference-optimization/.info",
"value": {
"title": "Direct Preference Optimization: Your Language Model is Secretly a Reward Model —",
"description": "DPO introduces a simple classification loss that directly optimizes language model policies on human preference data, eliminating the need for reinforcement learning while maintaining theoretical equivalence to the RLHF objective.",
"created_at": 1778136772851,
"created_by": "0x91f99a2B95D96C99585BBb28A4D8EbBBe7892a70"
}
}