{"id":"https://openalex.org/W7161732709","doi":"https://doi.org/10.48550/arxiv.2605.17558","title":"Firefly: Illuminating Large-Scale Verified Tool-Call Data Generation from Real APIs","display_name":"Firefly: Illuminating Large-Scale Verified Tool-Call Data Generation from Real APIs","publication_year":2026,"publication_date":"2026-05-17","ids":{"openalex":"https://openalex.org/W7161732709","doi":"https://doi.org/10.48550/arxiv.2605.17558"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.17558","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17558","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.17558","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136465946","display_name":"Yuxuan Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Yuxuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136476797","display_name":"Ziyi Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Ziyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136457652","display_name":"Yingzhou Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Yingzhou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078860840","display_name":"Yisi Sang","orcid":"https://orcid.org/0000-0002-8876-7542"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sang, Yisi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006496692","display_name":"Jiri Gesi","orcid":"https://orcid.org/0000-0002-1188-2921"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gesi, Jiri","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136471489","display_name":"Xianfeng Tang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Xianfeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100744377","display_name":"Yimeng Zhang","orcid":"https://orcid.org/0000-0002-2967-1516"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yimeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136494789","display_name":"Zhenwei Dai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dai, Zhenwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136481585","display_name":"Hui Liu","orcid":"https://orcid.org/0000-0001-8245-6025"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Hui","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130230456","display_name":"Hanqing Lu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Hanqing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058483710","display_name":"Chen Luo","orcid":"https://orcid.org/0000-0003-4339-0355"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Chen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136479202","display_name":"Qi He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Qi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136489187","display_name":"Benoit Dumoulin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dumoulin, Benoit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136468176","display_name":"Jing Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Jing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5062817658","display_name":"Dakuo Wang","orcid":"https://orcid.org/0000-0001-9371-9441"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Dakuo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":15,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.11819999665021896,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.11819999665021896,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13197","display_name":"Spreadsheets and End-User Computing","score":0.06449999660253525,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.06369999796152115,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.692799985408783},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6507999897003174},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.48969998955726624},{"id":"https://openalex.org/keywords/server","display_name":"Server","score":0.46540001034736633},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4424999952316284},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.43849998712539673},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.43560001254081726},{"id":"https://openalex.org/keywords/verifiable-secret-sharing","display_name":"Verifiable secret sharing","score":0.40049999952316284},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.3799999952316284},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.36910000443458557}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8144999742507935},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.692799985408783},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6507999897003174},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.48969998955726624},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.46540001034736633},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4424999952316284},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.43849998712539673},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.43560001254081726},{"id":"https://openalex.org/C85847156","wikidata":"https://www.wikidata.org/wiki/Q59015987","display_name":"Verifiable secret sharing","level":3,"score":0.40049999952316284},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.3799999952316284},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.36910000443458557},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.36579999327659607},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.35749998688697815},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.35530000925064087},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.35429999232292175},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3359000086784363},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.33230000734329224},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.32710000872612},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3264000117778778},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.32339999079704285},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.314300000667572},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.31049999594688416},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3041999936103821},{"id":"https://openalex.org/C168065819","wikidata":"https://www.wikidata.org/wiki/Q845566","display_name":"Debugging","level":2,"score":0.298799991607666},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.2921000123023987},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.29100000858306885},{"id":"https://openalex.org/C110251889","wikidata":"https://www.wikidata.org/wiki/Q1569697","display_name":"Model checking","level":2,"score":0.2881999909877777},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.2775000035762787},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.27649998664855957},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27559998631477356},{"id":"https://openalex.org/C186594467","wikidata":"https://www.wikidata.org/wiki/Q1429176","display_name":"Flooding (psychology)","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C191287063","wikidata":"https://www.wikidata.org/wiki/Q543281","display_name":"Glitch","level":3,"score":0.2685999870300293},{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.266400009393692},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C2781020372","wikidata":"https://www.wikidata.org/wiki/Q533093","display_name":"On the fly","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.2590000033378601},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.17558","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17558","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.17558","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17558","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Training":[0],"tool-calling":[1,179],"agents":[2],"requires":[3],"large-scale":[4],"trajectory":[5],"data":[6,39],"with":[7,161],"verifiable":[8],"labels,":[9],"yet":[10],"existing":[11],"approaches":[12],"either":[13],"synthesize":[14,77],"environments":[15],"that":[16,126],"diverge":[17],"from":[18,40,80],"real":[19,70],"API":[20],"behavior":[21],"or":[22],"generate":[23],"tasks":[24,57,78,150],"without":[25],"ground-truth":[26],"outcomes":[27],"for":[28,35],"verification.":[29],"We":[30],"present":[31],"FireFly,":[32],"a":[33,66,100,123],"pipeline":[34,146],"generating":[36,56],"verified":[37,149],"tool-call":[38],"real-world":[41,93],"MCP":[42],"servers.":[43],"Our":[44],"key":[45],"insight":[46],"is":[47],"to":[48,107],"invert":[49],"the":[50,90],"standard":[51],"synthesis":[52],"pipeline:":[53],"rather":[54],"than":[55],"and":[58,104,131,136,141,154,174,184],"hoping":[59],"they":[60],"are":[61],"solvable,":[62],"we":[63,98,121],"first":[64],"let":[65],"strong":[67],"LLM":[68],"explore":[69],"APIs":[71],"along":[72],"graph-guided":[73],"DAG":[74],"structures,":[75],"then":[76],"backward":[79],"observed":[81],"outcomes,":[82],"guaranteeing":[83],"label":[84],"correctness":[85],"by":[86],"construction.":[87],"To":[88,114],"handle":[89],"scale":[91],"of":[92],"tool":[94,102],"spaces":[95],"(${\\sim}$1,000":[96],"tools),":[97],"build":[99],"pairwise":[101],"graph":[103],"sample":[105],"sub-DAGs":[106],"focus":[108],"exploration":[109,129],"on":[110,163,169,177],"semantically":[111],"coherent":[112],"workflows.":[113],"address":[115],"environment":[116],"drift":[117],"in":[118],"live":[119],"APIs,":[120],"construct":[122],"retrieval-augmented":[124],"simulator":[125],"caches":[127],"all":[128],"results":[130],"replays":[132],"them":[133],"during":[134],"training":[135],"evaluation,":[137],"enabling":[138],"fully":[139],"offline":[140],"reproducible":[142],"RL.":[143],"Applying":[144],"this":[145],"yields":[147],"5,144":[148],"spanning":[151],"240":[152],"servers":[153],"993":[155],"tools.":[156],"A":[157],"4B-parameter":[158],"model":[159],"trained":[160],"GRPO":[162],"FireFly":[164],"matches":[165],"Claude":[166],"Sonnet":[167],"4.6":[168],"our":[170],"held-out":[171],"test":[172],"set":[173],"shows":[175],"improvements":[176],"multiple":[178],"benchmarks":[180],"including":[181],"Tau2-Bench,":[182],"MCPMark,":[183],"MCP-Atlas.":[185]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-20T00:00:00"}
