{
"type": "SET_VALUE",
"ref": "/apps/knowledge/topics/courses/direct-preference-optimization-your-language-model--dpo-direct-preference-optimization/.info",
"value": {
"title": "Direct Preference Optimization: Your Language Model is Secretly a Reward Model —",
"description": "DPO introduces a simple classification loss that directly optimizes language model policies on human preference data, eliminating the need for reinforcement learning while maintaining theoretical equivalence to the RLHF objective.",
"created_at": 1776520419441,
"created_by": "0xD330170b4E4585E6b824C4E194FD2630BA5dEF40"
}
}