{"id":"https://openalex.org/W7147310888","doi":"https://doi.org/10.48550/arxiv.2603.29902","title":"ATP-Bench: Towards Agentic Tool Planning for MLLM Interleaved Generation","display_name":"ATP-Bench: Towards Agentic Tool Planning for MLLM Interleaved Generation","publication_year":2026,"publication_date":"2026-03-31","ids":{"openalex":"https://openalex.org/W7147310888","doi":"https://doi.org/10.48550/arxiv.2603.29902"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.29902","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.29902","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.29902","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132658500","display_name":"Yinuo Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yinuo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045286456","display_name":"Zi Qian","orcid":"https://orcid.org/0000-0002-8011-6147"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Zi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132551742","display_name":"Heng Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Heng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132713960","display_name":"Jiahao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jiahao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132585398","display_name":"Yajie Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yajie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101804803","display_name":"Zhihang Li","orcid":"https://orcid.org/0000-0002-9305-7924"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zhihang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132578082","display_name":"Mengyu Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Mengyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132569393","display_name":"Erchao Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Erchao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121672896","display_name":"Xiaoxi Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Xiaoxi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132604130","display_name":"Guanjun Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Guanjun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8500999808311462,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8500999808311462,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.06700000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.008200000040233135,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6568999886512756},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.6256999969482422},{"id":"https://openalex.org/keywords/milestone","display_name":"Milestone","score":0.5527999997138977},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.47440001368522644},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.47269999980926514},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3452000021934509},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.33410000801086426},{"id":"https://openalex.org/keywords/automated-planning-and-scheduling","display_name":"Automated planning and scheduling","score":0.3089999854564667}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7159000039100647},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6568999886512756},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.6256999969482422},{"id":"https://openalex.org/C120060458","wikidata":"https://www.wikidata.org/wiki/Q10145","display_name":"Milestone","level":2,"score":0.5527999997138977},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.47440001368522644},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.47269999980926514},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4221000075340271},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4058000147342682},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3452000021934509},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.33410000801086426},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.32839998602867126},{"id":"https://openalex.org/C114073186","wikidata":"https://www.wikidata.org/wiki/Q2631895","display_name":"Automated planning and scheduling","level":2,"score":0.3089999854564667},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2971999943256378},{"id":"https://openalex.org/C203479927","wikidata":"https://www.wikidata.org/wiki/Q5165939","display_name":"Controller (irrigation)","level":2,"score":0.2962999939918518},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2825999855995178},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.2687999904155731},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.26829999685287476},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.26660001277923584},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.2535000145435333},{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.25200000405311584},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.2506999969482422},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.2500999867916107}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.29902","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.29902","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.29902","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.29902","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.4745500385761261}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Interleaved":[0],"text-and-image":[1],"generation":[2,28],"represents":[3],"a":[4,14,66,94,133],"significant":[5,174],"frontier":[6],"for":[7,83,145,182,188],"Multimodal":[8],"Large":[9],"Language":[10],"Models":[11],"(MLLMs),":[12],"offering":[13],"more":[15],"intuitive":[16],"way":[17],"to":[18,43,77,79,119],"convey":[19],"complex":[20],"information.":[21],"Current":[22],"paradigms":[23],"rely":[24],"on":[25,160],"either":[26],"image":[27],"or":[29],"retrieval":[30],"augmentation,":[31],"yet":[32],"they":[33],"typically":[34],"treat":[35],"the":[36,51,62],"two":[37],"as":[38,65],"mutually":[39],"exclusive":[40],"paths,":[41],"failing":[42],"unify":[44],"factuality":[45],"with":[46,168],"creativity.":[47],"We":[48],"argue":[49],"that":[50,69,165],"next":[52],"milestone":[53],"in":[54,176],"this":[55,89],"field":[56],"is":[57],"Agentic":[58],"Tool":[59],"Planning,":[60],"where":[61],"model":[63],"serves":[64],"central":[67],"controller":[68],"autonomously":[70],"determines":[71],"when,":[72],"where,":[73],"and":[74,108,115,127,148,172,184,193],"which":[75],"tools":[76],"invoke":[78],"produce":[80],"interleaved":[81,170,190],"responses":[82],"visual-critical":[84,110],"queries.":[85],"To":[86],"systematically":[87],"evaluate":[88,120],"paradigm,":[90],"we":[91,131],"introduce":[92],"ATP-Bench,":[93],"novel":[95],"benchmark":[96],"comprising":[97],"7,702":[98],"QA":[99],"pairs":[100],"(including":[101],"1,592":[102],"VQA":[103],"pairs)":[104],"across":[105],"eight":[106],"categories":[107],"25":[109],"intents,":[111],"featuring":[112],"human-verified":[113],"queries":[114],"ground":[116],"truths.":[117],"Furthermore,":[118],"agentic":[121],"planning":[122,171],"independent":[123],"of":[124],"end-to-end":[125],"execution":[126],"changing":[128],"tool":[129,146],"backends,":[130],"propose":[132],"Multi-Agent":[134],"MLLM-as-a-Judge":[135],"(MAM)":[136],"system.":[137],"MAM":[138],"evaluates":[139],"tool-call":[140],"precision,":[141],"identifies":[142],"missed":[143],"opportunities":[144],"use,":[147],"assesses":[149],"overall":[150],"response":[151],"quality":[152],"without":[153],"requiring":[154],"ground-truth":[155],"references.":[156],"Our":[157],"extensive":[158],"experiments":[159],"10":[161],"state-of-the-art":[162],"MLLMs":[163],"reveal":[164],"models":[166],"struggle":[167],"coherent":[169],"exhibit":[173],"variations":[175],"tool-use":[177],"behavior,":[178],"highlighting":[179],"substantial":[180],"room":[181],"improvement":[183],"providing":[185],"actionable":[186],"guidance":[187],"advancing":[189],"generation.":[191],"Dataset":[192],"code":[194],"are":[195],"available":[196],"at":[197],"https://github.com/Qwen-Applications/ATP-Bench.":[198]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-02T00:00:00"}
