{"id":"https://openalex.org/W7131396888","doi":"https://doi.org/10.48550/arxiv.2602.20730","title":"Rethinking Efficiency in Neural Combinatorial Optimization: Batched Preference Optimization with Mamba","display_name":"Rethinking Efficiency in Neural Combinatorial Optimization: Batched Preference Optimization with Mamba","publication_year":2026,"publication_date":"2026-02-24","ids":{"openalex":"https://openalex.org/W7131396888","doi":"https://doi.org/10.48550/arxiv.2602.20730"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.20730","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126807432","display_name":"Zhenxing Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xu, Zhenxing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104254022","display_name":"Zeyuan Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Zeyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126787408","display_name":"Weidong Bao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bao, Weidong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126853871","display_name":"Hui Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Yan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126833031","display_name":"Yan Zheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Ji","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5111852918","display_name":"Ji Guang Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Zhiguang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5126807432"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.36010000109672546,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.36010000109672546,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.2858000099658966,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10100","display_name":"Metaheuristic Optimization Algorithms Research","score":0.05209999904036522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5601999759674072},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.483599990606308},{"id":"https://openalex.org/keywords/bootstrapping","display_name":"Bootstrapping (finance)","score":0.436599999666214},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.4088999927043915},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.3668999969959259},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.3375000059604645},{"id":"https://openalex.org/keywords/offline-learning","display_name":"Offline learning","score":0.3237999975681305}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7634999752044678},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6051999926567078},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5601999759674072},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5066999793052673},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.483599990606308},{"id":"https://openalex.org/C207609745","wikidata":"https://www.wikidata.org/wiki/Q4944086","display_name":"Bootstrapping (finance)","level":2,"score":0.436599999666214},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.4088999927043915},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.3668999969959259},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.3375000059604645},{"id":"https://openalex.org/C2780490138","wikidata":"https://www.wikidata.org/wiki/Q7079636","display_name":"Offline learning","level":3,"score":0.3237999975681305},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.3222000002861023},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3158000111579895},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.310699999332428},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2994999885559082},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.29339998960494995},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2800000011920929},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.27959999442100525},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C2780102126","wikidata":"https://www.wikidata.org/wiki/Q10928179","display_name":"Online and offline","level":2,"score":0.26980000734329224}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.20730","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.20730","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.20730","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.20730","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,68,151],"study":[1],"efficiency":[2],"as":[3],"a":[4,25,74],"first-class":[5],"objective":[6],"in":[7,146],"Neural":[8],"Combinatorial":[9],"Optimization":[10,57],"(NCO)":[11],"and":[12,53,85,102,119,128,149,159],"present":[13],"ECO,":[14],"an":[15],"efficient":[16],"learning":[17,71],"framework":[18],"that":[19,78],"combines":[20],"batched":[21,60],"preference":[22,100,115],"optimization":[23],"with":[24,35,73],"Mamba":[26,76],"backbone.":[27],"Instead":[28],"of":[29,162],"tightly":[30],"interleaving":[31],"every":[32],"policy":[33],"update":[34],"on-policy":[36],"rollouts,":[37],"ECO":[38,130],"decouples":[39],"trajectory":[40],"generation":[41],"from":[42],"gradient":[43],"updates":[44],"through":[45],"two":[46],"stages:":[47],"supervised":[48],"warm-up":[49],"on":[50,59,82,155],"pre-computed":[51],"solutions":[52],"iterative":[54,104],"Direct":[55],"Preference":[56],"(DPO)":[58],"candidate":[61],"sets":[62],"generated":[63],"by":[64],"the":[65,132,137,160],"current":[66],"policy.":[67],"pair":[69],"this":[70],"pipeline":[72],"mixed":[75],"encoder-decoder":[77],"reduces":[79],"memory":[80,147,156],"growth":[81],"long":[83],"sequences":[84],"improves":[86],"hardware":[87],"utilization.":[88],"A":[89],"local-search-guided":[90],"bootstrapping":[91],"strategy":[92],"is":[93,109,120],"further":[94],"used":[95,111],"during":[96,117],"training":[97,118],"to":[98,112],"widen":[99],"margins":[101],"stabilize":[103],"improvement.":[105],"Importantly,":[106],"local":[107],"search":[108],"only":[110],"construct":[113],"stronger":[114],"pairs":[116],"never":[121],"invoked":[122],"at":[123],"inference":[124],"time.":[125],"On":[126],"TSP":[127],"CVRP,":[129],"achieves":[131],"strongest":[133],"overall":[134],"performance":[135],"among":[136],"compared":[138],"neural":[139],"baselines":[140],"while":[141],"also":[142],"delivering":[143],"clear":[144],"advantages":[145],"usage":[148],"throughput.":[150],"provide":[152],"additional":[153],"analysis":[154],"scaling,":[157],"throughput,":[158],"contribution":[161],"each":[163],"design":[164],"component.":[165]},"counts_by_year":[],"updated_date":"2026-04-30T06:05:26.967640","created_date":"2026-02-26T00:00:00"}
