{"id":"https://openalex.org/W7160248635","doi":"https://doi.org/10.48550/arxiv.2605.01347","title":"MAD-OPD: Breaking the Ceiling in On-Policy Distillation via Multi-Agent Debate","display_name":"MAD-OPD: Breaking the Ceiling in On-Policy Distillation via Multi-Agent Debate","publication_year":2026,"publication_date":"2026-05-02","ids":{"openalex":"https://openalex.org/W7160248635","doi":"https://doi.org/10.48550/arxiv.2605.01347"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.01347","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.01347","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.01347","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135382073","display_name":"Jianze Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jianze","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135315225","display_name":"Ying Liu","orcid":"https://orcid.org/0000-0003-3890-5414"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Ying","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135291100","display_name":"Jinlong Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jinlong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020416386","display_name":"\u80e1\u65ed\u7eaf","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Xuchun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135286837","display_name":"Qilong Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Qilong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135325553","display_name":"Yu Cao","orcid":"https://orcid.org/0009-0005-1800-6755"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135392827","display_name":"Jun Wang","orcid":"https://orcid.org/0000-0002-4291-2134"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135403964","display_name":"Hua Yang","orcid":"https://orcid.org/0000-0002-5709-3952"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Hua","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135336261","display_name":"Yong Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Yong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5086038509","display_name":"Qianglong Chen","orcid":"https://orcid.org/0000-0002-7845-1544"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Qianglong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10533","display_name":"Teaching and Learning Programming","score":0.2304999977350235,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10533","display_name":"Teaching and Learning Programming","score":0.2304999977350235,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.08869999647140503,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.07739999890327454,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ceiling","display_name":"Ceiling (cloud)","score":0.7064999938011169},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.6169999837875366},{"id":"https://openalex.org/keywords/train","display_name":"Train","score":0.5547999739646912},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.5103999972343445},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.4869000017642975},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.412200003862381}],"concepts":[{"id":"https://openalex.org/C2777489069","wikidata":"https://www.wikidata.org/wiki/Q1589822","display_name":"Ceiling (cloud)","level":2,"score":0.7064999938011169},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.6169999837875366},{"id":"https://openalex.org/C190839683","wikidata":"https://www.wikidata.org/wiki/Q2448197","display_name":"Train","level":2,"score":0.5547999739646912},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.5103999972343445},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4885999858379364},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.4869000017642975},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.412200003862381},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.39809998869895935},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3659000098705292},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.35179999470710754},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.31529998779296875},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.3018999993801117},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.30070000886917114},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.29910001158714294},{"id":"https://openalex.org/C45555294","wikidata":"https://www.wikidata.org/wiki/Q28113351","display_name":"Inequality","level":2,"score":0.28529998660087585},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2676999866962433}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.01347","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.01347","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.01347","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.01347","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4294007122516632}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"On-policy":[0],"distillation":[1,65],"(OPD)":[2],"trains":[3],"a":[4,20,68,127],"student":[5,29],"on":[6,177],"its":[7,97],"own":[8],"trajectories":[9,47],"under":[10,120],"token-level":[11,89],"teacher":[12,26,66],"supervision,":[13,90],"but":[14],"existing":[15],"methods":[16],"are":[17],"capped":[18],"by":[19,62,96,186,192],"single-teacher":[21,197],"capability":[22],"ceiling:":[23],"when":[24],"the":[25,28,31,64,76,80,178,183,189,195],"errs,":[27],"inherits":[30],"error.":[32],"OPD":[33,102],"also":[34,107],"remains":[35],"largely":[36],"unexplored":[37],"in":[38],"agentic":[39,104,136,166,184],"tasks,":[40,105],"where":[41],"per-step":[42],"errors":[43],"compound":[44],"across":[45,173],"long":[46],"and":[48,138,146,151,158,164,167,188],"destabilize":[49],"training.":[50],"We":[51,124],"propose":[52],"MAD-OPD":[53,170],"(Multi-Agent":[54],"Debate-driven":[55],"On-Policy":[56,109],"Distillation),":[57],"which":[58,113],"breaks":[59],"this":[60],"ceiling":[61],"recasting":[63],"as":[67],"deliberative":[69],"collective":[70,85],"of":[71],"teachers":[72],"that":[73,87],"debate":[74,81],"over":[75,194],"student's":[77],"on-policy":[78],"state;":[79],"produces":[82],"an":[83],"emergent":[84],"intelligence":[86],"supplies":[88],"with":[91],"each":[92],"teacher's":[93],"contribution":[94],"weighted":[95],"post-debate":[98],"confidence.":[99],"To":[100],"extend":[101],"to":[103,117],"we":[106],"introduce":[108],"Agentic":[110],"Distillation":[111],"(OPAD),":[112],"adds":[114],"step-level":[115],"sampling":[116],"stabilize":[118],"training":[119],"multi-step":[121],"error":[122],"compounding.":[123],"additionally":[125],"derive":[126],"task-adaptive":[128],"divergence":[129,142],"principle,":[130],"selecting":[131],"JSD":[132],"(Jensen-Shannon":[133],"divergence)":[134],"for":[135,143],"stability":[137],"reverse":[139],"KL":[140],"(Kullback-Leibler)":[141],"code":[144,168,190],"generation,":[145],"verify":[147],"it":[148,181],"both":[149],"theoretically":[150],"empirically.":[152],"Across":[153],"six":[154,175],"teacher-student":[155],"configurations":[156],"(Qwen3":[157],"Qwen3.5;":[159],"1.7B-14B":[160],"students,":[161],"8B-32B":[162],"teachers)":[163],"five":[165],"benchmarks,":[169],"ranks":[171],"first":[172],"all":[174],"configurations;":[176],"14B+8B$\\to$4B":[179],"setting":[180],"lifts":[182],"average":[185,191],"$+2.4\\%$":[187],"$+3.7\\%$":[193],"stronger":[196],"OPD.":[198]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-06T00:00:00"}
