{"id":"https://openalex.org/W7133517480","doi":"https://doi.org/10.48550/arxiv.2603.02701","title":"Graph-GRPO: Stabilizing Multi-Agent Topology Learning via Group Relative Policy Optimization","display_name":"Graph-GRPO: Stabilizing Multi-Agent Topology Learning via Group Relative Policy Optimization","publication_year":2026,"publication_date":"2026-03-03","ids":{"openalex":"https://openalex.org/W7133517480","doi":"https://doi.org/10.48550/arxiv.2603.02701"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.02701","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02701","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.02701","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087209190","display_name":"Yueyang Cang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cang, Yueyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088962332","display_name":"Xiaoteng Zhang","orcid":"https://orcid.org/0009-0002-2271-7857"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Xiaoteng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126439664","display_name":"Erlu Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Erlu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072090101","display_name":"Zehua Ji","orcid":"https://orcid.org/0000-0003-2861-228X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ji, Zehua","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128073158","display_name":"Yuhang Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yuhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128120138","display_name":"Yuchen He (5057375)","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Yuchen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128045026","display_name":"Zhiyuan Ning","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ning, Zhiyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128126859","display_name":"Chen Yijun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yijun, Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108873061","display_name":"Wenge Que","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Que, Wenge","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128073920","display_name":"Li Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Li","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5087209190"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.6273000240325928,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.6273000240325928,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.09000000357627869,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.041999999433755875,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.6869000196456909},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.6067000031471252},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6003999710083008},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5651000142097473},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.5073000192642212},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.507099986076355},{"id":"https://openalex.org/keywords/topology","display_name":"Topology (electrical circuits)","score":0.4519999921321869}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.704800009727478},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.6869000196456909},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.6067000031471252},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6003999710083008},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5651000142097473},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.5073000192642212},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.507099986076355},{"id":"https://openalex.org/C184720557","wikidata":"https://www.wikidata.org/wiki/Q7825049","display_name":"Topology (electrical circuits)","level":2,"score":0.4519999921321869},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.42260000109672546},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.4212000072002411},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4047999978065491},{"id":"https://openalex.org/C2781311116","wikidata":"https://www.wikidata.org/wiki/Q83306","display_name":"Group (periodic table)","level":2,"score":0.36419999599456787},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.36390000581741333},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34779998660087585},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34549999237060547},{"id":"https://openalex.org/C199845137","wikidata":"https://www.wikidata.org/wiki/Q145490","display_name":"Network topology","level":2,"score":0.3154999911785126},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.3057999908924103},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.3052999973297119},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2808000147342682},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2800999879837036},{"id":"https://openalex.org/C44871818","wikidata":"https://www.wikidata.org/wiki/Q5154139","display_name":"Communication in small groups","level":2,"score":0.26330000162124634}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.02701","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02701","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.02701","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.02701","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Optimizing":[0],"communication":[1,107,172],"topology":[2,84,98],"is":[3],"fundamental":[4],"to":[5,24],"the":[6,50,114,125,131,138],"efficiency":[7],"and":[8,49,112,145,154,169],"effectiveness":[9],"of":[10,94,105,116],"Large":[11],"Language":[12],"Model":[13],"(LLM)-based":[14],"Multi-Agent":[15],"Systems":[16],"(MAS).":[17],"While":[18],"recent":[19],"approaches":[20],"utilize":[21],"reinforcement":[22],"learning":[23,73],"dynamically":[25],"construct":[26],"task-specific":[27],"graphs,":[28],"they":[29],"typically":[30],"rely":[31],"on":[32,120,152],"single-sample":[33],"policy":[34],"gradients":[35],"with":[36],"absolute":[37],"rewards":[38,59,129],"(e.g.,":[39],"binary":[40],"correctness).":[41],"This":[42],"paradigm":[43],"suffers":[44],"from":[45,141],"severe":[46],"gradient":[47],"variance":[48,144],"credit":[51,148],"assignment":[52],"problem:":[53],"simple":[54],"queries":[55,65],"yield":[56],"non-informative":[57],"positive":[58],"for":[60,109],"suboptimal":[61],"structures,":[62],"while":[63],"difficult":[64],"often":[66],"result":[67],"in":[68,99],"failures":[69],"that":[70,87,159],"provide":[71],"no":[72],"signal.":[74],"To":[75],"address":[76],"these":[77],"challenges,":[78],"we":[79],"propose":[80],"Graph-GRPO,":[81],"a":[82,96,103],"novel":[83],"optimization":[85],"framework":[86],"integrates":[88],"Group":[89],"Relative":[90],"Policy":[91],"Optimization.":[92],"Instead":[93],"evaluating":[95],"single":[97],"isolation,":[100],"Graph-GRPO":[101,160],"samples":[102],"group":[104],"diverse":[106],"graphs":[108],"each":[110],"query":[111],"computes":[113],"advantage":[115],"specific":[117],"edges":[118],"based":[119],"their":[121],"relative":[122],"performance":[123],"within":[124],"group.":[126],"By":[127],"normalizing":[128],"across":[130],"sampled":[132],"group,":[133],"our":[134],"method":[135],"effectively":[136],"mitigates":[137],"noise":[139],"derived":[140],"task":[142],"difficulty":[143],"enables":[146],"fine-grained":[147],"assignment.":[149],"Extensive":[150],"experiments":[151],"reasoning":[153],"code":[155],"generation":[156],"benchmarks":[157],"demonstrate":[158],"significantly":[161],"outperforms":[162],"state-of-the-art":[163],"baselines,":[164],"achieving":[165],"superior":[166],"training":[167],"stability":[168],"identifying":[170],"critical":[171],"pathways":[173],"previously":[174],"obscured":[175],"by":[176],"reward":[177],"noise.":[178]},"counts_by_year":[],"updated_date":"2026-03-05T07:36:02.291473","created_date":"2026-03-05T00:00:00"}
