{"id":"https://openalex.org/W3201418760","doi":"https://doi.org/10.1109/ijcnn52387.2021.9534010","title":"Learning to Optimise Routing Problems using Policy Optimisation","display_name":"Learning to Optimise Routing Problems using Policy Optimisation","publication_year":2021,"publication_date":"2021-07-18","ids":{"openalex":"https://openalex.org/W3201418760","doi":"https://doi.org/10.1109/ijcnn52387.2021.9534010","mag":"3201418760"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn52387.2021.9534010","is_oa":true,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9534010","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/ijcnn52387.2021.9534010","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019898753","display_name":"Nasrin Sultana","orcid":"https://orcid.org/0000-0002-2919-238X"},"institutions":[{"id":"https://openalex.org/I4210095297","display_name":"MIT University","ror":"https://ror.org/00v140q16","country_code":"MK","type":"education","lineage":["https://openalex.org/I4210095297"]},{"id":"https://openalex.org/I82951845","display_name":"RMIT University","ror":"https://ror.org/04ttjf776","country_code":"AU","type":"education","lineage":["https://openalex.org/I82951845"]}],"countries":["AU","MK"],"is_corresponding":true,"raw_author_name":"Nasrin Sultana","raw_affiliation_strings":["RMIT University, AU,School of Computing Technologies","School of Computing Technologies, RMIT University, AU"],"affiliations":[{"raw_affiliation_string":"RMIT University, AU,School of Computing Technologies","institution_ids":["https://openalex.org/I4210095297","https://openalex.org/I82951845"]},{"raw_affiliation_string":"School of Computing Technologies, RMIT University, AU","institution_ids":["https://openalex.org/I4210095297","https://openalex.org/I82951845"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071422010","display_name":"Jeffrey Chan","orcid":"https://orcid.org/0000-0002-7865-072X"},"institutions":[{"id":"https://openalex.org/I4210095297","display_name":"MIT University","ror":"https://ror.org/00v140q16","country_code":"MK","type":"education","lineage":["https://openalex.org/I4210095297"]},{"id":"https://openalex.org/I82951845","display_name":"RMIT University","ror":"https://ror.org/04ttjf776","country_code":"AU","type":"education","lineage":["https://openalex.org/I82951845"]}],"countries":["AU","MK"],"is_corresponding":false,"raw_author_name":"Jeffrey Chan","raw_affiliation_strings":["RMIT University, AU,School of Computing Technologies","School of Computing Technologies, RMIT University, AU"],"affiliations":[{"raw_affiliation_string":"RMIT University, AU,School of Computing Technologies","institution_ids":["https://openalex.org/I4210095297","https://openalex.org/I82951845"]},{"raw_affiliation_string":"School of Computing Technologies, RMIT University, AU","institution_ids":["https://openalex.org/I4210095297","https://openalex.org/I82951845"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033512312","display_name":"Tabinda Sarwar","orcid":"https://orcid.org/0000-0001-7313-5350"},"institutions":[{"id":"https://openalex.org/I4210095297","display_name":"MIT University","ror":"https://ror.org/00v140q16","country_code":"MK","type":"education","lineage":["https://openalex.org/I4210095297"]},{"id":"https://openalex.org/I82951845","display_name":"RMIT University","ror":"https://ror.org/04ttjf776","country_code":"AU","type":"education","lineage":["https://openalex.org/I82951845"]}],"countries":["AU","MK"],"is_corresponding":false,"raw_author_name":"Tabinda Sarwar","raw_affiliation_strings":["RMIT University, AU,School of Computing Technologies","School of Computing Technologies, RMIT University, AU"],"affiliations":[{"raw_affiliation_string":"RMIT University, AU,School of Computing Technologies","institution_ids":["https://openalex.org/I4210095297","https://openalex.org/I82951845"]},{"raw_affiliation_string":"School of Computing Technologies, RMIT University, AU","institution_ids":["https://openalex.org/I4210095297","https://openalex.org/I82951845"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006614329","display_name":"A. K. Qin","orcid":"https://orcid.org/0000-0001-6631-1651"},"institutions":[{"id":"https://openalex.org/I97750245","display_name":"Software (Spain)","ror":"https://ror.org/02ethns06","country_code":"ES","type":"company","lineage":["https://openalex.org/I4210087817","https://openalex.org/I97750245"]},{"id":"https://openalex.org/I57093077","display_name":"Swinburne University of Technology","ror":"https://ror.org/031rekg67","country_code":"AU","type":"education","lineage":["https://openalex.org/I57093077"]}],"countries":["AU","ES"],"is_corresponding":false,"raw_author_name":"A. K. Qin","raw_affiliation_strings":["Swinburne University of Technology, AU,School of Software and Electrical Engineering","School of Software and Electrical Engineering, Swinburne University of Technology, AU"],"affiliations":[{"raw_affiliation_string":"Swinburne University of Technology, AU,School of Software and Electrical Engineering","institution_ids":["https://openalex.org/I57093077"]},{"raw_affiliation_string":"School of Software and Electrical Engineering, Swinburne University of Technology, AU","institution_ids":["https://openalex.org/I57093077","https://openalex.org/I97750245"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5019898753"],"corresponding_institution_ids":["https://openalex.org/I4210095297","https://openalex.org/I82951845"],"apc_list":null,"apc_paid":null,"fwci":1.0539,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.81180861,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10567","display_name":"Vehicle Routing Optimization Methods","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10567","display_name":"Vehicle Routing Optimization Methods","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9818000197410583,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11942","display_name":"Transportation and Mobility Innovations","score":0.9761999845504761,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8762052059173584},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7094305753707886},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.7051997184753418},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.6464635133743286},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6052833199501038},{"id":"https://openalex.org/keywords/vehicle-routing-problem","display_name":"Vehicle routing problem","score":0.5704925656318665},{"id":"https://openalex.org/keywords/travelling-salesman-problem","display_name":"Travelling salesman problem","score":0.5017538070678711},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.501488208770752},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.47325828671455383},{"id":"https://openalex.org/keywords/routing","display_name":"Routing (electronic design automation)","score":0.47099798917770386},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.46903619170188904},{"id":"https://openalex.org/keywords/cross-entropy-method","display_name":"Cross-entropy method","score":0.4404098093509674},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3523401916027069},{"id":"https://openalex.org/keywords/combinatorial-optimization","display_name":"Combinatorial optimization","score":0.2851504981517792},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1801626980304718},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15813779830932617}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8762052059173584},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7094305753707886},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.7051997184753418},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6464635133743286},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6052833199501038},{"id":"https://openalex.org/C123784306","wikidata":"https://www.wikidata.org/wiki/Q944041","display_name":"Vehicle routing problem","level":3,"score":0.5704925656318665},{"id":"https://openalex.org/C175859090","wikidata":"https://www.wikidata.org/wiki/Q322212","display_name":"Travelling salesman problem","level":2,"score":0.5017538070678711},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.501488208770752},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.47325828671455383},{"id":"https://openalex.org/C74172769","wikidata":"https://www.wikidata.org/wiki/Q1446839","display_name":"Routing (electronic design automation)","level":2,"score":0.47099798917770386},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.46903619170188904},{"id":"https://openalex.org/C75782508","wikidata":"https://www.wikidata.org/wiki/Q3333633","display_name":"Cross-entropy method","level":4,"score":0.4404098093509674},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3523401916027069},{"id":"https://openalex.org/C52692508","wikidata":"https://www.wikidata.org/wiki/Q1333872","display_name":"Combinatorial optimization","level":2,"score":0.2851504981517792},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1801626980304718},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15813779830932617},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C98036226","wikidata":"https://www.wikidata.org/wiki/Q7268356","display_name":"Quadratic assignment problem","level":3,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/ijcnn52387.2021.9534010","is_oa":true,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9534010","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},{"id":"pmh:oai:alma61RMIT.INST:11289048360001341","is_oa":false,"landing_page_url":"https://doi.org/10.1109/IJCNN52387.2021.9534010","pdf_url":null,"source":{"id":"https://openalex.org/S4306402074","display_name":"RMIT Research Repository (RMIT University Library)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I82951845","host_organization_name":"RMIT University","host_organization_lineage":["https://openalex.org/I82951845"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:figshare.com:article/27591585","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1109/ijcnn52387.2021.9534010","is_oa":true,"landing_page_url":"https://doi.org/10.1109/ijcnn52387.2021.9534010","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W64088143","https://openalex.org/W626292722","https://openalex.org/W1522301498","https://openalex.org/W1668207500","https://openalex.org/W2039906176","https://openalex.org/W2098774185","https://openalex.org/W2106378689","https://openalex.org/W2119717200","https://openalex.org/W2130942839","https://openalex.org/W2155027007","https://openalex.org/W2594103415","https://openalex.org/W2607264901","https://openalex.org/W2790770538","https://openalex.org/W2805798351","https://openalex.org/W2903820428","https://openalex.org/W2944956154","https://openalex.org/W2948433391","https://openalex.org/W2949561945","https://openalex.org/W2952332632","https://openalex.org/W2962821147","https://openalex.org/W2962979969","https://openalex.org/W2964121744","https://openalex.org/W2970706905","https://openalex.org/W2975306946","https://openalex.org/W2987288550","https://openalex.org/W2996246179","https://openalex.org/W3093945975","https://openalex.org/W3097232108","https://openalex.org/W3098134815","https://openalex.org/W3116111955","https://openalex.org/W4287548833","https://openalex.org/W4295138992","https://openalex.org/W6631190155","https://openalex.org/W6674884181","https://openalex.org/W6679436768","https://openalex.org/W6683204974","https://openalex.org/W6725207838","https://openalex.org/W6730742100","https://openalex.org/W6734517396","https://openalex.org/W6748487558","https://openalex.org/W6748833949","https://openalex.org/W6756287877","https://openalex.org/W6762840543","https://openalex.org/W6763838503","https://openalex.org/W6769975600","https://openalex.org/W6771561114","https://openalex.org/W6784870226","https://openalex.org/W6785065334","https://openalex.org/W6787476040"],"related_works":["https://openalex.org/W4281906162","https://openalex.org/W3122244527","https://openalex.org/W1985012061","https://openalex.org/W4312306433","https://openalex.org/W2974874797","https://openalex.org/W4252122873","https://openalex.org/W2125009529","https://openalex.org/W3136908367","https://openalex.org/W2115347385","https://openalex.org/W3185606095"],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1,23],"learning":[2,24],"(DRL)":[3],"has":[4],"demonstrated":[5],"promising":[6],"performance":[7,42],"to":[8,12,35,96,99,110],"learn":[9],"effective":[10],"heuristics":[11],"solve":[13],"complex":[14],"combinatorial":[15],"optimisation":[16],"problems":[17,169],"via":[18],"policy":[19,77,86,89],"networks.":[20],"However,":[21],"traditional":[22],"(RL)":[25],"suffers":[26],"from":[27],"insufficient":[28],"exploration,":[29],"which":[30],"often":[31],"results":[32,172],"in":[33,184],"pre-convergence":[34,109],"poor":[36],"policies":[37,112],"and":[38,163,181],"many":[39],"challenges":[40],"the":[41,76,81,85,101,106,118,130,136,175,189],"of":[43,84,108,132,150],"DRL.":[44],"To":[45],"prevent":[46],"this,":[47],"we":[48],"propose":[49],"an":[50,71],"Entropy":[51],"Regularised":[52],"Reinforcement":[53],"Learning":[54],"(ERRL)":[55],"method":[56,69,120,177],"that":[57,174],"supports":[58],"exploration":[59,90],"by":[60],"providing":[61],"more":[62],"stochastic":[63],"policies,":[64],"improving":[65],"optimisation.":[66],"The":[67],"ERRL":[68,119],"incorporates":[70],"entropy":[72],"term,":[73],"defined":[74],"over":[75],"network's":[78],"outputs,":[79],"into":[80],"loss":[82],"function":[83],"network.":[87],"Hence,":[88],"can":[91,113,178],"be":[92,114],"explicitly":[93],"advocated":[94],"subjected":[95],"a":[97,104],"balance":[98],"maximise":[100],"reward.":[102],"As":[103],"result,":[105],"risk":[107],"inferior":[111],"reduced.":[115],"We":[116,127],"implement":[117],"based":[121],"on":[122],"two":[123,137],"existing":[124],"DRL":[125,138],"algorithms.":[126,191],"have":[128],"compared":[129],"performances":[131],"our":[133],"implementations":[134],"with":[135,141,166],"algorithms":[139],"along":[140],"several":[142],"state-of-the-art":[143,190],"heuristic-based":[144],"non-RL":[145],"approaches":[146],"for":[147],"three":[148],"categories":[149],"routing":[151,160,165],"problems,":[152],"i.e.,":[153],"travelling":[154],"salesman":[155],"problem":[156,161],"(TSP),":[157],"capacitated":[158],"vehicle":[159],"(CVRP)":[162],"multiple":[164],"fixed":[167],"fleet":[168],"(MRPFF).":[170],"Experimental":[171],"show":[173],"proposed":[176],"find":[179],"better":[180],"faster":[182],"solutions":[183],"most":[185],"test":[186],"cases":[187],"than":[188]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
