{"id":"https://openalex.org/W7138444851","doi":"https://doi.org/10.1609/aaai.v40i19.38700","title":"Multi-Agent Pointer Transformer: Seq-to-Seq Reinforcement Learning for Multi-Vehicle Dynamic Pickup-Delivery Problems","display_name":"Multi-Agent Pointer Transformer: Seq-to-Seq Reinforcement Learning for Multi-Vehicle Dynamic Pickup-Delivery Problems","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138444851","doi":"https://doi.org/10.1609/aaai.v40i19.38700"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i19.38700","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i19.38700","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38700/42662","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38700/42662","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125708660","display_name":"Zengyu Zou","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zengyu Zou","raw_affiliation_strings":["School of Computer Science and Engineering, Beihang University, Beijing, China\nMOE Engineering Research Center of Advanced Computer Application Technology, Beihang University, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China\nMOE Engineering Research Center of Advanced Computer Application Technology, Beihang University, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129686737","display_name":"Jingyuan Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingyuan Wang","raw_affiliation_strings":["School of Computer Science and Engineering, Beihang University, Beijing, China\nSchool of Economics and Management, Beihang University, Beijing, China\nMIIT Key Laboratory of Data Intelligence and Management, Beihang University, Beijing, China\nMOE Engineering Research Center of Advanced Computer Application Technology, Beihang University, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China\nSchool of Economics and Management, Beihang University, Beijing, China\nMIIT Key Laboratory of Data Intelligence and Management, Beihang University, Beijing, China\nMOE Engineering Research Center of Advanced Computer Application Technology, Beihang University, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129653995","display_name":"Yixuan Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yixuan Huang","raw_affiliation_strings":["School of Computer Science and Engineering, Beihang University, Beijing, China\nMOE Engineering Research Center of Advanced Computer Application Technology, Beihang University, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, Beijing, China\nMOE Engineering Research Center of Advanced Computer Application Technology, Beihang University, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109361866","display_name":"Junjie Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junjie Wu","raw_affiliation_strings":["School of Economics and Management, Beihang University, Beijing, China\nMIIT Key Laboratory of Data Intelligence and Management, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Economics and Management, Beihang University, Beijing, China\nMIIT Key Laboratory of Data Intelligence and Management, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5125708660"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.75555556,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"19","first_page":"16593","last_page":"16601"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10567","display_name":"Vehicle Routing Optimization Methods","score":0.8047000169754028,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10567","display_name":"Vehicle Routing Optimization Methods","score":0.8047000169754028,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11942","display_name":"Transportation and Mobility Innovations","score":0.061500001698732376,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10524","display_name":"Traffic control and management","score":0.030899999663233757,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.690500020980835},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5647000074386597},{"id":"https://openalex.org/keywords/pointer","display_name":"Pointer (user interface)","score":0.4449999928474426},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4359000027179718},{"id":"https://openalex.org/keywords/structured-prediction","display_name":"Structured prediction","score":0.42879998683929443},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.4212000072002411},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.376800000667572},{"id":"https://openalex.org/keywords/computational-complexity-theory","display_name":"Computational complexity theory","score":0.375900000333786}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7483000159263611},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.690500020980835},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5647000074386597},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5242000222206116},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47690001130104065},{"id":"https://openalex.org/C150202949","wikidata":"https://www.wikidata.org/wiki/Q107602","display_name":"Pointer (user interface)","level":2,"score":0.4449999928474426},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4359000027179718},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.42879998683929443},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.4212000072002411},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.376800000667572},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.375900000333786},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.3725999891757965},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3555999994277954},{"id":"https://openalex.org/C113954288","wikidata":"https://www.wikidata.org/wiki/Q186885","display_name":"Timestamp","level":2,"score":0.33219999074935913},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.31369999051094055},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.30709999799728394},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.2939999997615814},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2777999937534332},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.2687999904155731},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.26260000467300415},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.2624000012874603},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.2551000118255615}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i19.38700","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i19.38700","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38700/42662","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i19.38700","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i19.38700","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/38700/42662","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7503575086593628}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7138444851.pdf","grobid_xml":"https://content.openalex.org/works/W7138444851.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"addresses":[2],"the":[3,35,120,162],"cooperative":[4],"Multi-Vehicle":[5],"Dynamic":[6],"Pickup":[7],"and":[8,15,39,62,149,188],"Delivery":[9],"Problem":[10],"with":[11,136],"Stochastic":[12],"Requests":[13],"(MVDPDPSR)":[14],"proposes":[16],"an":[17,32,146],"end-to-end":[18],"centralized":[19],"decision-making":[20,164],"framework":[21],"based":[22],"on":[23,173],"sequence-to-sequence,":[24],"named":[25],"Multi-Agent":[26],"Pointer":[27,138],"Transformer":[28,126,134],"(MAPT).":[29],"MVDPDPSR":[30],"is":[31,111],"extension":[33],"of":[34,186],"vehicle":[36],"routing":[37],"problem":[38],"a":[40,125,133,137,151],"spatio-temporal":[41],"system":[42],"optimization":[43],"problem,":[44],"widely":[45],"applied":[46],"in":[47,59,145,184],"scenarios":[48],"such":[49],"as":[50],"on-demand":[51],"delivery.":[52],"Classical":[53],"operations":[54,197],"research":[55,198],"methods":[56,74,183],"face":[57],"bottlenecks":[58],"computational":[60,191],"complexity":[61],"time":[63,192],"efficiency":[64],"when":[65],"handling":[66],"large-scale":[67],"dynamic":[68],"problems.":[69],"Although":[70],"existing":[71,181],"reinforcement":[72],"learning":[73],"have":[75],"achieved":[76],"some":[77],"progress,":[78],"they":[79],"still":[80],"encounter":[81],"several":[82],"challenges:":[83],"1)":[84],"Independent":[85],"decoding":[86],"across":[87],"multiple":[88],"vehicles":[89],"fails":[90],"to":[91,102,128,140,155,168,195],"model":[92],"joint":[93,108,142],"action":[94,109,143],"distributions;":[95],"2)":[96],"The":[97,107],"feature":[98],"extraction":[99],"network":[100],"struggles":[101],"capture":[103,156],"inter-entity":[104,157],"relationships;":[105],"3)":[106],"space":[110],"exponentially":[112],"large.":[113],"To":[114],"address":[115],"these":[116],"issues,":[117],"we":[118,160],"designed":[119],"MAPT":[121,178],"framework,":[122],"which":[123],"employs":[124],"Encoder":[127],"extract":[129],"entity":[130],"representations,":[131],"combines":[132],"Decoder":[135],"Network":[139],"generate":[141],"sequences":[144],"AutoRegressive":[147],"manner,":[148],"introduces":[150],"Relation-Aware":[152],"Attention":[153],"module":[154],"relationships.":[158],"Additionally,":[159],"guide":[161],"model's":[163],"using":[165],"informative":[166],"priors":[167],"facilitate":[169],"effective":[170],"exploration.":[171],"Experiments":[172],"8":[174],"datasets":[175],"demonstrate":[176],"that":[177],"significantly":[179],"outperforms":[180],"baseline":[182],"terms":[185],"performance":[187],"exhibits":[189],"substantial":[190],"advantages":[193],"compared":[194],"classical":[196],"methods.":[199]},"counts_by_year":[],"updated_date":"2026-03-20T20:47:17.329874","created_date":"2026-03-18T00:00:00"}
