{"id":"https://openalex.org/W7154625367","doi":"https://doi.org/10.48550/arxiv.2604.14054","title":"$\u03c0$-Play: Multi-Agent Self-Play via Privileged Self-Distillation without External Data","display_name":"$\u03c0$-Play: Multi-Agent Self-Play via Privileged Self-Distillation without External Data","publication_year":2026,"publication_date":"2026-04-15","ids":{"openalex":"https://openalex.org/W7154625367","doi":"https://doi.org/10.48550/arxiv.2604.14054"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.14054","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14054","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.14054","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133749434","display_name":"Yaocheng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yaocheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080193690","display_name":"Yuanheng Zhu","orcid":"https://orcid.org/0000-0001-5384-423X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Yuanheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133766009","display_name":"Wenyue Chong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chong, Wenyue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132565521","display_name":"Songjun Tu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tu, Songjun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133747952","display_name":"Qichao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Qichao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133783387","display_name":"Jiajun Chai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chai, Jiajun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133776342","display_name":"Xiaohan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xiaohan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133740833","display_name":"Wei Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lin, Wei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133749479","display_name":"Guojun Yin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yin, Guojun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133826052","display_name":"Dongbin Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Dongbin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.18520000576972961,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.18520000576972961,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.12229999899864197,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.08839999884366989,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7002999782562256},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.678600013256073},{"id":"https://openalex.org/keywords/artifact","display_name":"Artifact (error)","score":0.5253000259399414},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5242000222206116},{"id":"https://openalex.org/keywords/path","display_name":"Path (computing)","score":0.4578000009059906},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.35019999742507935}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7483999729156494},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7002999782562256},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.678600013256073},{"id":"https://openalex.org/C2779010991","wikidata":"https://www.wikidata.org/wiki/Q2720909","display_name":"Artifact (error)","level":2,"score":0.5253000259399414},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5242000222206116},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4869999885559082},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.4578000009059906},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3912999927997589},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.35019999742507935},{"id":"https://openalex.org/C148220186","wikidata":"https://www.wikidata.org/wiki/Q7111912","display_name":"Outcome (game theory)","level":2,"score":0.34929999709129333},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3467000126838684},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3434999883174896},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.30649998784065247},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3000999987125397},{"id":"https://openalex.org/C91632574","wikidata":"https://www.wikidata.org/wiki/Q15088675","display_name":"Data curation","level":2,"score":0.2833000123500824},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.25440001487731934},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2542000114917755},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.25279998779296875}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.14054","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14054","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.14054","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14054","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4674735963344574}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Deep":[0],"search":[1,172],"agents":[2,173],"have":[3],"emerged":[4],"as":[5,144],"a":[6,32,63,82,121,140,150,160],"promising":[7],"paradigm":[8],"for":[9,94],"addressing":[10],"complex":[11],"information-seeking":[12],"tasks,":[13],"but":[14,39],"their":[15],"training":[16],"remains":[17],"challenging":[18],"due":[19],"to":[20,35,50,147],"sparse":[21,46],"rewards,":[22,48],"weak":[23],"credit":[24],"assignment,":[25],"and":[26,100,128,139,174],"limited":[27],"labeled":[28],"data.":[29],"Self-play":[30],"offers":[31],"scalable":[33],"route":[34],"reduce":[36],"data":[37],"dependence,":[38],"conventional":[40,181],"self-play":[41,60,88,127,158],"optimizes":[42],"students":[43],"only":[44],"through":[45],"outcome":[47],"leading":[49],"low":[51,98],"learning":[52],"efficiency.":[53],"In":[54,130],"this":[55,113],"work,":[56],"we":[57,115],"observe":[58],"that":[59,74,166],"naturally":[61],"produces":[62],"question":[64],"construction":[65],"path":[66],"(QCP)":[67],"during":[68],"task":[69],"generation,":[70],"an":[71,132],"intermediate":[72],"artifact":[73],"captures":[75],"the":[76,95],"reverse":[77],"solution":[78],"process.":[79],"This":[80,154],"reveals":[81],"new":[83],"source":[84],"of":[85],"privileged":[86,92,110,145],"information:":[87],"can":[89],"provide":[90],"high-quality":[91],"information":[93],"self-distillation":[96],"at":[97,101,186],"cost":[99],"scale,":[102],"without":[103],"relying":[104],"on":[105],"human":[106],"feedback":[107],"or":[108],"curated":[109],"information.":[111],"Leveraging":[112],"insight,":[114],"propose":[116],"Privileged":[117],"Information":[118],"Self-Play":[119],"($\u03c0$-Play),":[120],"novel":[122],"multi-agent":[123],"self-evolution":[124],"framework":[125],"combining":[126],"self-distillation.":[129,153],"$\u03c0$-Play,":[131],"examiner":[133],"generates":[134],"tasks":[135],"together":[136],"with":[137],"QCPs,":[138],"teacher":[141],"employs":[142],"QCP":[143],"context":[146],"densely":[148],"supervise":[149],"student":[151],"via":[152],"design":[155],"transforms":[156],"sparse-reward":[157],"into":[159],"dense-feedback":[161],"co-evolution.":[162],"Extensive":[163],"experiments":[164],"show":[165],"data-free":[167],"$\u03c0$-Play":[168],"surpasses":[169],"fully":[170],"supervised":[171],"improves":[175],"evolutionary":[176],"efficiency":[177],"by":[178],"2-3$\\times$":[179],"over":[180],"self-play.":[182],"Code":[183],"is":[184],"available":[185],"https://github.com/zhyaoch/pi-play.":[187]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-17T00:00:00"}
