{"id":"https://openalex.org/W7143401131","doi":"https://doi.org/10.48550/arxiv.2603.26458","title":"Can AI Models Direct Each Other? Organizational Structure as a Probe into Training Limitations","display_name":"Can AI Models Direct Each Other? Organizational Structure as a Probe into Training Limitations","publication_year":2026,"publication_date":"2026-03-27","ids":{"openalex":"https://openalex.org/W7143401131","doi":"https://doi.org/10.48550/arxiv.2603.26458"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.26458","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26458","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.26458","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130965336","display_name":"Rui Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Rui","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5130965336"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.3562999963760376,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.3562999963760376,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.07069999724626541,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.047200001776218414,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7353000044822693},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5189999938011169},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.49380001425743103},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4853000044822693},{"id":"https://openalex.org/keywords/trace","display_name":"TRACE (psycholinguistics)","score":0.39739999175071716},{"id":"https://openalex.org/keywords/organizational-structure","display_name":"Organizational structure","score":0.38839998841285706},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.3707999885082245}],"concepts":[{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7353000044822693},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5322999954223633},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5189999938011169},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.49380001425743103},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4853000044822693},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42660000920295715},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.39739999175071716},{"id":"https://openalex.org/C182566","wikidata":"https://www.wikidata.org/wiki/Q759524","display_name":"Organizational structure","level":2,"score":0.38839998841285706},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3707999885082245},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.37070000171661377},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.3571999967098236},{"id":"https://openalex.org/C171078966","wikidata":"https://www.wikidata.org/wiki/Q111029","display_name":"Root (linguistics)","level":2,"score":0.3443000018596649},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3366999924182892},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.3239000141620636},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.3221000134944916},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31189998984336853},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.2896000146865845},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.2840000092983246},{"id":"https://openalex.org/C84945661","wikidata":"https://www.wikidata.org/wiki/Q7366567","display_name":"Root cause","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.25369998812675476}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.26458","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26458","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.26458","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.26458","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","score":0.4161384105682373,"display_name":"Decent work and economic growth"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Can":[0],"an":[1,26],"expensive":[2,27,112,117],"AI":[3],"model":[4,29,46,73,224],"effectively":[5],"direct":[6],"a":[7,22,43,88,92,97,103,120,124,142,194],"cheap":[8,44],"one":[9],"to":[10,193,226,244,249],"solve":[11],"software":[12],"engineering":[13],"tasks?":[14],"We":[15,54],"study":[16],"this":[17,221],"question":[18],"by":[19,218],"introducing":[20],"ManagerWorker,":[21],"two-agent":[23],"pipeline":[24,70,216],"where":[25],"\"manager\"":[28],"(text-only,":[30],"no":[31],"code":[32,52],"execution)":[33],"analyzes":[34],"issues,":[35],"dispatches":[36],"exploration":[37,172],"tasks,":[38],"and":[39,72,81,173,188,205,240,256],"reviews":[40],"implementations,":[41],"while":[42,170],"\"worker\"":[45],"(with":[47],"full":[48],"repo":[49],"access)":[50],"executes":[51],"changes.":[53],"evaluate":[55],"on":[56],"200":[57],"instances":[58],"from":[59,262],"SWE-bench":[60],"Lite":[61],"across":[62],"five":[63],"configurations":[64],"that":[65,111,137,178],"vary":[66],"the":[67,79,82,106,131,138,152,168,184,233,238],"manager-worker":[68],"relationship,":[69],"complexity,":[71],"pairing.":[74],"Our":[75],"findings":[76],"reveal":[77],"both":[78],"promise":[80],"limits":[83],"of":[84,105],"multi-agent":[85],"direction:":[86],"(1)":[87],"strong":[89,98],"manager":[90,122],"directing":[91,123,139],"weak":[93,121,125,132],"worker":[94,126],"(62%)":[95],"matches":[96],"single":[99,195],"agent":[100,133],"(60%)":[101],"at":[102],"fraction":[104],"strong-model":[107],"token":[108],"usage,":[109],"showing":[110,177],"reasoning":[113],"can":[114],"substitute":[115],"for":[116,232,237],"execution;":[118],"(2)":[119],"(42%)":[127],"performs":[128],"worse":[129],"than":[130],"alone":[134],"(44%),":[135],"demonstrating":[136],"relationship":[140],"requires":[141],"genuine":[143],"capability":[144,185],"gap--structure":[145],"without":[146],"substance":[147],"is":[148,181],"pure":[149],"overhead;":[150],"(3)":[151],"manager's":[153],"value":[154],"lies":[155],"in":[156],"directing,":[157],"not":[158],"merely":[159],"reviewing--a":[160],"minimal":[161],"review-only":[162],"loop":[163],"adds":[164],"just":[165],"2pp":[166],"over":[167],"baseline,":[169],"structured":[171],"planning":[174],"add":[175],"11pp,":[176],"active":[179],"direction":[180],"what":[182],"makes":[183],"gap":[186],"productive;":[187],"(4)":[189],"these":[190],"behaviors":[191],"trace":[192],"root":[196],"cause:":[197],"current":[198,263],"models":[199],"are":[200,259],"trained":[201,228],"as":[202],"monolithic":[203],"agents,":[204],"splitting":[206],"them":[207],"into":[208],"director/worker":[209],"roles":[210],"fights":[211],"their":[212],"training":[213,251,264],"distribution.":[214],"The":[215],"succeeds":[217],"designing":[219],"around":[220],"mismatch--keeping":[222],"each":[223],"close":[225],"its":[227],"mode":[229,257],"(text":[230],"generation":[231],"manager,":[234],"tool":[235],"use":[236],"worker)":[239],"externalizing":[241],"organizational":[242],"structure":[243],"code.":[245],"This":[246],"diagnosis":[247],"points":[248],"concrete":[250],"gaps:":[252],"delegation,":[253],"scoped":[254],"execution,":[255],"switching":[258],"skills":[260],"absent":[261],"data.":[265]},"counts_by_year":[],"updated_date":"2026-03-31T06:07:48.031334","created_date":"2026-03-31T00:00:00"}
