{"id":"https://openalex.org/W7129200700","doi":"https://doi.org/10.48550/arxiv.2602.13691","title":"PhGPO: Pheromone-Guided Policy Optimization for Long-Horizon Tool Planning","display_name":"PhGPO: Pheromone-Guided Policy Optimization for Long-Horizon Tool Planning","publication_year":2026,"publication_date":"2026-02-14","ids":{"openalex":"https://openalex.org/W7129200700","doi":"https://doi.org/10.48550/arxiv.2602.13691"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.13691","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126255759","display_name":"Yu Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Li, Yu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126205341","display_name":"Guangfeng Cai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cai, Guangfeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126264937","display_name":"Shengtian Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Shengtian","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126182430","display_name":"Han Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Han","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126186685","display_name":"Shuo Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Shuo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126243894","display_name":"Xu He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126265642","display_name":"Dong Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Dong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126201443","display_name":"Lei Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Lei","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5126255759"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.13500000536441803,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.13500000536441803,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.12600000202655792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.07580000162124634,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ant-colony-optimization-algorithms","display_name":"Ant colony optimization algorithms","score":0.6039000153541565},{"id":"https://openalex.org/keywords/path","display_name":"Path (computing)","score":0.4336000084877014},{"id":"https://openalex.org/keywords/ant-colony","display_name":"Ant colony","score":0.41499999165534973},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4023999869823456},{"id":"https://openalex.org/keywords/motion-planning","display_name":"Motion planning","score":0.30790001153945923}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6541000008583069},{"id":"https://openalex.org/C40128228","wikidata":"https://www.wikidata.org/wiki/Q460851","display_name":"Ant colony optimization algorithms","level":2,"score":0.6039000153541565},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.4336000084877014},{"id":"https://openalex.org/C60891933","wikidata":"https://www.wikidata.org/wiki/Q796575","display_name":"Ant colony","level":3,"score":0.41499999165534973},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4023999869823456},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3564999997615814},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.30790001153945923},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.3003000020980835},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2928999960422516},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.2881999909877777},{"id":"https://openalex.org/C109718341","wikidata":"https://www.wikidata.org/wiki/Q1385229","display_name":"Metaheuristic","level":2,"score":0.27970001101493835},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.251800000667572}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.13691","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.13691","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.13691","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.13691","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,12],"Large":[3],"Language":[4],"Model":[5],"(LLM)":[6],"agents":[7],"have":[8],"demonstrated":[9],"strong":[10],"capabilities":[11],"executing":[13],"complex":[14],"tasks":[15],"through":[16],"tool":[17,22,145,150],"use.":[18],"However,":[19],"long-horizon":[20,149],"multi-step":[21],"planning":[23],"is":[24,44,47],"challenging,":[25],"because":[26],"the":[27,84,101,123,156],"exploration":[28],"space":[29],"suffers":[30],"from":[31,117],"a":[32,40,111],"combinatorial":[33],"explosion.":[34],"In":[35,66],"this":[36,67],"scenario,":[37],"even":[38],"when":[39],"correct":[41],"tool-use":[42],"path":[43],"found,":[45],"it":[46],"usually":[48],"considered":[49],"an":[50],"immediate":[51],"reward":[52],"for":[53,63],"current":[54],"training,":[55],"which":[56,79,109],"would":[57],"not":[58],"provide":[59],"any":[60],"reusable":[61,76,136],"information":[62],"subsequent":[64],"training.":[65],"paper,":[68],"we":[69,103],"argue":[70],"that":[71,138],"historically":[72,94,143],"successful":[73,95,144],"trajectories":[74,119],"contain":[75],"tool-transition":[77],"patterns,":[78],"can":[80,97],"be":[81,98],"leveraged":[82],"throughout":[83],"whole":[85],"training":[86],"process.":[87],"Inspired":[88],"by":[89,100],"ant":[90],"colony":[91],"optimization":[92,141],"where":[93],"paths":[96],"reflected":[99],"pheromone,":[102],"propose":[104],"Pheromone-Guided":[105],"Policy":[106],"Optimization":[107],"(PhGPO),":[108],"learns":[110],"trajectory-based":[112],"transition":[113],"pattern":[114],"(i.e.,":[115],"pheromone)":[116],"historical":[118],"and":[120,135],"then":[121],"uses":[122],"learned":[124,131],"pheromone":[125,132],"to":[126],"guide":[127],"policy":[128,140],"optimization.":[129],"This":[130],"provides":[133],"explicit":[134],"guidance":[137],"steers":[139],"toward":[142],"transitions,":[146],"thereby":[147],"improving":[148],"planning.":[151],"Comprehensive":[152],"experimental":[153],"results":[154],"demonstrate":[155],"effectiveness":[157],"of":[158],"our":[159],"proposed":[160],"PhGPO.":[161]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-18T00:00:00"}
