{"id":"https://openalex.org/W4304194074","doi":"https://doi.org/10.48550/arxiv.2210.03475","title":"Winner Takes It All: Training Performant RL Populations for Combinatorial Optimization","display_name":"Winner Takes It All: Training Performant RL Populations for Combinatorial Optimization","publication_year":2022,"publication_date":"2022-10-07","ids":{"openalex":"https://openalex.org/W4304194074","doi":"https://doi.org/10.48550/arxiv.2210.03475"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2210.03475","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2210.03475","pdf_url":"https://arxiv.org/pdf/2210.03475","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2210.03475","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032092481","display_name":"Nathan Grinsztajn","orcid":"https://orcid.org/0000-0001-6817-5972"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Grinsztajn, Nathan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023452296","display_name":"Daniel Furelos-Blanco","orcid":"https://orcid.org/0000-0001-7461-1910"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Furelos-Blanco, Daniel","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052796575","display_name":"Thomas D. Barrett","orcid":"https://orcid.org/0000-0001-6241-3028"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Surana, Shikha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Bonnet, Cl\u00e9ment","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bonnet, Cl\u00e9ment","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Barrett, Thomas D.","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Barrett, Thomas D.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5032092481"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10567","display_name":"Vehicle Routing Optimization Methods","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10567","display_name":"Vehicle Routing Optimization Methods","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10100","display_name":"Metaheuristic Optimization Algorithms Research","score":0.9872999787330627,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/knapsack-problem","display_name":"Knapsack problem","score":0.6694209575653076},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6553112268447876},{"id":"https://openalex.org/keywords/travelling-salesman-problem","display_name":"Travelling salesman problem","score":0.6208451986312866},{"id":"https://openalex.org/keywords/combinatorial-optimization","display_name":"Combinatorial optimization","score":0.5988050699234009},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5918271541595459},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5672524571418762},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.5572783350944519},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5462242960929871},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4739511013031006},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4699452221393585},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4488235414028168},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3608782887458801},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.32647544145584106},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1751050055027008},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.12998473644256592}],"concepts":[{"id":"https://openalex.org/C113138325","wikidata":"https://www.wikidata.org/wiki/Q864457","display_name":"Knapsack problem","level":2,"score":0.6694209575653076},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6553112268447876},{"id":"https://openalex.org/C175859090","wikidata":"https://www.wikidata.org/wiki/Q322212","display_name":"Travelling salesman problem","level":2,"score":0.6208451986312866},{"id":"https://openalex.org/C52692508","wikidata":"https://www.wikidata.org/wiki/Q1333872","display_name":"Combinatorial optimization","level":2,"score":0.5988050699234009},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5918271541595459},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5672524571418762},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.5572783350944519},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5462242960929871},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4739511013031006},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4699452221393585},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4488235414028168},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3608782887458801},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.32647544145584106},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1751050055027008},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.12998473644256592},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2210.03475","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2210.03475","pdf_url":"https://arxiv.org/pdf/2210.03475","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2210.03475","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2210.03475","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2210.03475","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2210.03475","pdf_url":"https://arxiv.org/pdf/2210.03475","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2572756133","https://openalex.org/W1563568831","https://openalex.org/W2135149549","https://openalex.org/W4294358747","https://openalex.org/W1604040598","https://openalex.org/W2946024655","https://openalex.org/W2861856156","https://openalex.org/W4246202890","https://openalex.org/W2146364482","https://openalex.org/W2758988501"],"abstract_inverted_index":{"Applying":[0],"reinforcement":[1],"learning":[2,72],"(RL)":[3],"to":[4,25,29,42,60],"combinatorial":[5],"optimization":[6],"problems":[7,34],"is":[8,23],"attractive":[9],"as":[10],"it":[11,22],"removes":[12],"the":[13,69,118,121],"need":[14],"for":[15,68,96],"expert":[16],"knowledge":[17],"or":[18,104],"pre-solved":[19],"instances.":[20],"However,":[21],"unrealistic":[24],"expect":[26],"an":[27,111],"agent":[28],"solve":[30],"these":[31],"(often":[32],"NP-)hard":[33],"in":[35],"a":[36,73,92,102,128],"single":[37],"shot":[38],"at":[39,84,116],"inference":[40],"due":[41],"their":[43],"inherent":[44],"complexity.":[45],"Thus,":[46],"leading":[47],"approaches":[48],"often":[49],"implement":[50],"additional":[51],"search":[52,59],"strategies,":[53],"from":[54],"stochastic":[55],"sampling":[56],"and":[57,133,150],"beam":[58],"explicit":[61],"fine-tuning.":[62],"In":[63],"this":[64,87],"paper,":[65],"we":[66,89],"argue":[67],"benefits":[70],"of":[71,75,99,107,120,130],"population":[74],"complementary":[76,131],"policies,":[77,132],"which":[78],"can":[79],"be":[80],"simultaneously":[81],"rolled":[82],"out":[83],"inference.":[85],"To":[86],"end,":[88],"introduce":[90],"Poppy,":[91],"simple":[93],"training":[94],"procedure":[95],"populations.":[97],"Instead":[98],"relying":[100],"on":[101,138],"predefined":[103],"hand-crafted":[105],"notion":[106],"diversity,":[108],"Poppy":[109,126],"induces":[110],"unsupervised":[112],"specialization":[113],"targeted":[114],"solely":[115],"maximizing":[117],"performance":[119],"population.":[122],"We":[123],"show":[124],"that":[125],"produces":[127],"set":[129],"obtains":[134],"state-of-the-art":[135],"RL":[136],"results":[137],"four":[139],"popular":[140],"NP-hard":[141],"problems:":[142],"traveling":[143],"salesman,":[144],"capacitated":[145],"vehicle":[146],"routing,":[147],"0-1":[148],"knapsack,":[149],"job-shop":[151],"scheduling.":[152]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":2}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2022-10-11T00:00:00"}
