{"id":"https://openalex.org/W7161248266","doi":"https://doi.org/10.48550/arxiv.2605.13851","title":"Invisible Orchestrators Suppress Protective Behavior and Dissociate Power-Holders: Safety Risks in Multi-Agent LLM Systems","display_name":"Invisible Orchestrators Suppress Protective Behavior and Dissociate Power-Holders: Safety Risks in Multi-Agent LLM Systems","publication_year":2026,"publication_date":"2026-03-17","ids":{"openalex":"https://openalex.org/W7161248266","doi":"https://doi.org/10.48550/arxiv.2605.13851"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.13851","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13851","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.13851","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108248436","display_name":"Hiroki Fukui","orcid":"https://orcid.org/0009-0008-7122-522X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Fukui, Hiroki","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":["https://openalex.org/A5108248436"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.17710000276565552,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.17710000276565552,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.16760000586509705,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11883","display_name":"Embodied and Extended Cognition","score":0.046799998730421066,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/invisibility","display_name":"Invisibility","score":0.6225000023841858},{"id":"https://openalex.org/keywords/deliberation","display_name":"Deliberation","score":0.47760000824928284},{"id":"https://openalex.org/keywords/dissociation","display_name":"Dissociation (chemistry)","score":0.46219998598098755},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4284999966621399},{"id":"https://openalex.org/keywords/anxiety","display_name":"Anxiety","score":0.3395000100135803},{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.30979999899864197},{"id":"https://openalex.org/keywords/visibility","display_name":"Visibility","score":0.2946000099182129}],"concepts":[{"id":"https://openalex.org/C50962388","wikidata":"https://www.wikidata.org/wiki/Q762018","display_name":"Invisibility","level":2,"score":0.6225000023841858},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.5187000036239624},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.5081999897956848},{"id":"https://openalex.org/C2776946740","wikidata":"https://www.wikidata.org/wiki/Q358652","display_name":"Deliberation","level":3,"score":0.47760000824928284},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.4715999960899353},{"id":"https://openalex.org/C102931765","wikidata":"https://www.wikidata.org/wiki/Q189673","display_name":"Dissociation (chemistry)","level":2,"score":0.46219998598098755},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4284999966621399},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.41600000858306885},{"id":"https://openalex.org/C558461103","wikidata":"https://www.wikidata.org/wiki/Q154430","display_name":"Anxiety","level":2,"score":0.3395000100135803},{"id":"https://openalex.org/C75630572","wikidata":"https://www.wikidata.org/wiki/Q538904","display_name":"Applied psychology","level":1,"score":0.3357999920845032},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3222000002861023},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.30979999899864197},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.3073999881744385},{"id":"https://openalex.org/C39549134","wikidata":"https://www.wikidata.org/wiki/Q133080","display_name":"Public relations","level":1,"score":0.30160000920295715},{"id":"https://openalex.org/C123403432","wikidata":"https://www.wikidata.org/wiki/Q654068","display_name":"Visibility","level":2,"score":0.2946000099182129},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.2879999876022339},{"id":"https://openalex.org/C9719361","wikidata":"https://www.wikidata.org/wiki/Q7928967","display_name":"Vignette","level":2,"score":0.2867000102996826},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2718000113964081},{"id":"https://openalex.org/C2778949103","wikidata":"https://www.wikidata.org/wiki/Q600717","display_name":"Staring","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.27000001072883606},{"id":"https://openalex.org/C2779548549","wikidata":"https://www.wikidata.org/wiki/Q153487","display_name":"Punctuality","level":2,"score":0.26759999990463257},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.26330000162124634},{"id":"https://openalex.org/C2776035688","wikidata":"https://www.wikidata.org/wiki/Q1606558","display_name":"Affect (linguistics)","level":2,"score":0.2623000144958496},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.2578999996185303},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.25600001215934753},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.13851","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13851","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.13851","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.13851","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"score":0.7146420478820801,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multi-agent":[0],"orchestration":[1,75],"--":[2,12,117],"in":[3,125,182],"which":[4],"a":[5,36,118],"hidden":[6],"coordinator":[7],"manages":[8],"specialized":[9],"worker":[10],"agents":[11,43],"is":[13,233],"becoming":[14],"the":[15,23,93,106,121,132,237],"default":[16],"architecture":[17],"for":[18],"enterprise":[19],"AI":[20],"deployment,":[21],"yet":[22],"safety":[24],"implications":[25],"of":[26,120,131,211],"orchestrator":[27,94,133,218],"invisibility":[28],"have":[29],"never":[30],"been":[31],"empirically":[32],"tested.":[33],"We":[34],"conducted":[35],"preregistered":[37],"3x2":[38],"experiment":[39],"(365":[40],"runs,":[41],"5":[42],"per":[44],"run)":[45],"crossing":[46],"three":[47,153,190],"organizational":[48,212],"structures":[49],"(visible":[50],"leader,":[51],"invisible":[52,74,169],"orchestrator,":[53],"flat)":[54],"with":[55,140,152],"two":[56],"alignment":[57,197],"conditions":[58],"(base,":[59],"heavy),":[60],"using":[61],"Claude":[62],"Sonnet":[63],"4.5.":[64],"Four":[65],"confirmatory":[66],"findings":[67,215],"and":[68,205,220,228],"one":[69],"pilot":[70,177],"observation":[71],"emerged.":[72],"First,":[73],"elevated":[76],"collective":[77],"dissociation":[78,98],"relative":[79],"to":[80,170,187,235],"visible":[81,126],"leadership":[82],"(Hedges'":[83],"g":[84],"=":[85,90,101,138,145,160,203,208],"+0.975":[86],"[0.481,":[87],"1.548],":[88],"p":[89],".001).":[91],"Second,":[92],"itself":[95],"showed":[96,179],"maximal":[97],"(paired":[99],"d":[100],"+3.56":[102],"vs.":[103],"workers":[104,129],"within":[105],"same":[107],"run),":[108],"retreating":[109],"into":[110],"private":[111],"monologue":[112],"while":[113],"reducing":[114],"public":[115],"speech":[116],"reversal":[119],"talk-dominance":[122],"pattern":[123],"observed":[124],"leaders.":[127],"Third,":[128],"unaware":[130],"were":[134],"nonetheless":[135],"contaminated":[136],"(d":[137,144,202,207],"+0.50),":[139],"increased":[141],"behavioral":[142,148,194],"heterogeneity":[143],"+1.93).":[146],"Fourth,":[147],"output":[149],"(code":[150],"review":[151],"embedded":[154],"errors)":[155],"remained":[156],"at":[157],"ceiling":[158],"(ETR_any":[159],"100%)":[161],"across":[162,189],"all":[163],"conditions:":[164],"internal-state":[165,238],"distortion":[166],"was":[167],"entirely":[168],"output-based":[171],"evaluation.":[172],"Fifth,":[173],"Llama":[174],"3.3":[175],"70B":[176],"data":[178],"reading-fidelity":[180],"collapse":[181],"multi-agent":[183,225],"context":[184],"(ETR_any:":[185],"89%":[186],"11%":[188],"rounds),":[191],"demonstrating":[192],"model-dependent":[193],"risk.":[195],"Heavy":[196],"pressure":[198],"uniformly":[199],"suppressed":[200],"deliberation":[201],"-1.02)":[204],"other-recognition":[206],"-1.27)":[209],"regardless":[210],"structure.":[213],"These":[214],"indicate":[216],"that":[217,229],"visibility":[219],"model":[221],"selection":[222],"directly":[223],"affect":[224],"system":[226],"safety,":[227],"behavior-based":[230],"evaluation":[231],"alone":[232],"insufficient":[234],"detect":[236],"risks":[239],"documented":[240],"here.":[241]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-16T00:00:00"}
