{"id":"https://openalex.org/W7151407413","doi":"https://doi.org/10.48550/arxiv.2604.03873","title":"SODA: Semi On-Policy Black-Box Distillation for Large Language Models","display_name":"SODA: Semi On-Policy Black-Box Distillation for Large Language Models","publication_year":2026,"publication_date":"2026-04-04","ids":{"openalex":"https://openalex.org/W7151407413","doi":"https://doi.org/10.48550/arxiv.2604.03873"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.03873","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03873","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.03873","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133082085","display_name":"Xiwen Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chen, Xiwen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133140721","display_name":"Jingjing Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jingjing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133103136","display_name":"Wenhui Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Wenhui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058590658","display_name":"Peijie Qiu","orcid":"https://orcid.org/0000-0002-1591-5436"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiu, Peijie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133086368","display_name":"Xuanzhao Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Xuanzhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027724386","display_name":"Hejian Sang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sang, Hejian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133113191","display_name":"Zhipeng Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhipeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059864597","display_name":"Alborz Geramifard","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Geramifard, Alborz","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133130342","display_name":"Feng Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Feng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5133082085"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.436599999666214,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.436599999666214,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.09239999949932098,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.05460000038146973,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.8417999744415283},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.5676000118255615},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.4837000072002411},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4733999967575073},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.46779999136924744},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4196999967098236},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4124999940395355},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.37880000472068787}],"concepts":[{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.8417999744415283},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7714999914169312},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.5676000118255615},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.4837000072002411},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4733999967575073},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.46779999136924744},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44209998846054077},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4196999967098236},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4124999940395355},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39570000767707825},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.37880000472068787},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.36559998989105225},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.3564000129699707},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3409000039100647},{"id":"https://openalex.org/C14103023","wikidata":"https://www.wikidata.org/wiki/Q11681459","display_name":"Pairing","level":3,"score":0.32179999351501465},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.30809998512268066},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3066999912261963},{"id":"https://openalex.org/C2779231336","wikidata":"https://www.wikidata.org/wiki/Q7534724","display_name":"Sketch","level":2,"score":0.30239999294281006},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.29750001430511475},{"id":"https://openalex.org/C55282118","wikidata":"https://www.wikidata.org/wiki/Q252683","display_name":"Snapshot (computer storage)","level":2,"score":0.29280000925064087},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.27549999952316284}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.03873","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03873","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.03873","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03873","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Black-box":[0],"knowledge":[1,16],"distillation":[2,181],"for":[3,132,139],"large":[4],"language":[5],"models":[6,155],"presents":[7],"a":[8,58,77,96,109],"strict":[9],"trade-off.":[10],"Simple":[11],"off-policy":[12],"methods":[13,27,167],"(e.g.,":[14,28],"sequence-level":[15],"distillation)":[17],"struggle":[18],"to":[19,88,124],"correct":[20],"the":[21,64,89,104,114,121,137,165],"student's":[22,115],"inherent":[23,65],"errors.":[24],"Fully":[25],"on-policy":[26,159],"Generative":[29],"Adversarial":[30],"Distillation)":[31],"solve":[32],"this":[33,48,157,179],"via":[34],"adversarial":[35,145,197],"training":[36,40,184],"but":[37],"introduce":[38],"well-known":[39],"instability":[41],"and":[42,71,143,153,194],"crippling":[43],"computational":[44],"overhead.":[45],"To":[46],"address":[47],"dilemma,":[49],"we":[50,93],"propose":[51],"SODA":[52,161],"(Semi":[53],"On-policy":[54],"Distillation":[55],"with":[56,108],"Alignment),":[57],"highly":[59,97],"efficient":[60],"alternative":[61],"motivated":[62],"by":[63,102],"capability":[66],"gap":[67],"between":[68],"frontier":[69],"teachers":[70],"much":[72],"smaller":[73],"base":[74],"models.":[75],"Because":[76],"compact":[78,151],"student":[79,123],"model's":[80],"natural,":[81],"zero-shot":[82],"responses":[83],"are":[84],"almost":[85],"strictly":[86],"inferior":[87,128],"powerful":[90],"teacher's":[91,105],"targets,":[92],"can":[94],"construct":[95],"effective":[98],"contrastive":[99],"signal":[100],"simply":[101],"pairing":[103],"optimal":[106],"response":[107],"one-time":[110],"static":[111,127],"snapshot":[112],"of":[113,171],"outputs.":[116],"This":[117],"demonstrates":[118],"that":[119],"exposing":[120],"small":[122],"its":[125],"own":[126],"behaviors":[129],"is":[130],"sufficient":[131],"high-quality":[133],"distribution":[134],"alignment,":[135],"eliminating":[136,196],"need":[138],"costly":[140],"dynamic":[141],"rollouts":[142],"fragile":[144],"balancing.":[146],"Extensive":[147],"evaluations":[148],"across":[149],"four":[150],"Qwen2.5":[152],"Llama-3":[154],"validate":[156],"semi":[158],"paradigm.":[160],"matches":[162],"or":[163],"outperforms":[164],"state-of-the-art":[166],"on":[168],"15":[169],"out":[170],"16":[172],"benchmark":[173],"results.":[174],"More":[175],"importantly,":[176],"it":[177],"achieves":[178],"superior":[180],"quality":[182],"while":[183],"10":[185],"times":[186],"faster,":[187],"consuming":[188],"27%":[189],"less":[190],"peak":[191],"GPU":[192],"memory,":[193],"completely":[195],"instability.":[198]},"counts_by_year":[],"updated_date":"2026-04-08T06:07:18.267832","created_date":"2026-04-08T00:00:00"}
