{"id":"https://openalex.org/W7160834434","doi":"https://doi.org/10.48550/arxiv.2605.07073","title":"TeamBench: Evaluating Agent Coordination under Enforced Role Separation","display_name":"TeamBench: Evaluating Agent Coordination under Enforced Role Separation","publication_year":2026,"publication_date":"2026-05-08","ids":{"openalex":"https://openalex.org/W7160834434","doi":"https://doi.org/10.48550/arxiv.2605.07073"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.07073","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07073","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.07073","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135892276","display_name":"Yubin Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Yubin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135880149","display_name":"Chanwoo Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Chanwoo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005853929","display_name":"\uae40\ud0dc\ud55c","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Taehan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135836073","display_name":"Eugene Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Eugene","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074595426","display_name":"Samuel Schmidgall","orcid":"https://orcid.org/0000-0001-8192-9337"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schmidgall, Samuel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004440882","display_name":"Salman Rahman","orcid":"https://orcid.org/0000-0003-0944-4313"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rahman, Salman","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111186642","display_name":"Chunjong Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Chunjong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135841535","display_name":"Cynthia Breazeal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Breazeal, Cynthia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135838025","display_name":"Xin Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033846851","display_name":"Hamid Palangi","orcid":"https://orcid.org/0000-0003-2912-4579"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Palangi, Hamid","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135857631","display_name":"Hae Won Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Hae Won","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5122791235","display_name":"Daniel McDuff","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"McDuff, Daniel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.4952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.4952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10525","display_name":"Human-Automation Interaction and Safety","score":0.0868000015616417,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.03420000150799751,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7807000279426575},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7799999713897705},{"id":"https://openalex.org/keywords/certification","display_name":"Certification","score":0.5015000104904175},{"id":"https://openalex.org/keywords/workspace","display_name":"Workspace","score":0.383899986743927},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.3637000024318695},{"id":"https://openalex.org/keywords/separation","display_name":"Separation (statistics)","score":0.30399999022483826}],"concepts":[{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7807000279426575},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7799999713897705},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7199000120162964},{"id":"https://openalex.org/C46304622","wikidata":"https://www.wikidata.org/wiki/Q374814","display_name":"Certification","level":2,"score":0.5015000104904175},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39469999074935913},{"id":"https://openalex.org/C58581272","wikidata":"https://www.wikidata.org/wiki/Q12741163","display_name":"Workspace","level":3,"score":0.383899986743927},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.3637000024318695},{"id":"https://openalex.org/C2776061190","wikidata":"https://www.wikidata.org/wiki/Q7451805","display_name":"Separation (statistics)","level":2,"score":0.30399999022483826},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.2849999964237213},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27649998664855957},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.27000001072883606},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2612999975681305},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2565999925136566}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.07073","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07073","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.07073","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.07073","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.8020013570785522}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Agent":[0],"systems":[1],"often":[2,196],"decompose":[3],"a":[4,25,47],"task":[5,51,189],"across":[6,75,210],"multiple":[7],"roles,":[8,80],"but":[9,107,156],"these":[10],"roles":[11],"are":[12],"typically":[13],"specified":[14],"by":[15,20],"prompts":[16],"rather":[17],"than":[18],"enforced":[19],"access":[21],"controls.":[22],"Without":[23],"enforcement,":[24],"team":[26],"pass":[27,105,181],"rate":[28,182],"can":[29,85],"mask":[30],"whether":[31,36],"agents":[32,154,160,195],"actually":[33],"coordinated":[34],"or":[35],"one":[37],"role":[38,64,84,171],"effectively":[39],"did":[40],"another":[41],"role's":[42],"work.":[43],"We":[44],"present":[45],"TeamBench,":[46],"benchmark":[48,176],"with":[49,194],"851":[50],"templates":[52],"and":[53,72,78,93,99,134,201],"931":[54],"seeded":[55],"instances":[56],"for":[57],"evaluating":[58],"agent":[59],"coordination":[60],"under":[61,168],"operating":[62],"system-enforced":[63],"separation.":[65],"TeamBench":[66],"separates":[67],"specification":[68],"access,":[69],"workspace":[70],"editing,":[71],"final":[73,96],"certification":[74],"Planner,":[76],"Executor,":[77],"Verifier":[79],"so":[81],"that":[82,129,174,180],"no":[83],"read":[86],"the":[87,91,95,116,121,131,136,143,169,188],"full":[88],"requirements,":[89],"modify":[90],"workspace,":[92],"certify":[94],"answer.":[97],"Prompt-only":[98],"sandbox-enforced":[100],"teams":[101,203],"reach":[102],"statistically":[103],"indistinguishable":[104],"rates,":[106],"prompt-only":[108],"runs":[109],"produce":[110],"3.6":[111],"times":[112],"more":[113,205],"cases":[114],"where":[115],"verifier":[117,137],"attempts":[118],"to":[119],"edit":[120],"executor's":[122],"code.":[123],"Verifiers":[124],"approve":[125],"49%":[126],"of":[127],"submissions":[128],"fail":[130],"deterministic":[132],"grader,":[133],"removing":[135],"improves":[138],"mean":[139],"partial":[140],"score":[141],"in":[142],"ablation.":[144],"Team":[145],"value":[146],"is":[147],"also":[148],"conditional.":[149],"Teams":[150],"benefit":[151],"when":[152,158],"single":[153,159],"struggle,":[155],"hurt":[157],"already":[161],"perform":[162],"well.":[163],"A":[164],"40-session":[165],"human":[166,191,202],"study":[167],"same":[170],"separation":[172],"shows":[173],"our":[175],"exposes":[177],"interaction":[178],"patterns":[179],"misses.":[183],"Solo":[184],"participants":[185,192],"work":[186],"through":[187],"directly,":[190],"paired":[193],"collapse":[197],"into":[198],"quick":[199],"approval,":[200],"spend":[204],"effort":[206],"coordinating":[207],"missing":[208],"information":[209],"roles.":[211]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-12T00:00:00"}
