{"id":"https://openalex.org/W4392007398","doi":"https://doi.org/10.1109/tase.2024.3367237","title":"An End-to-End Deep Reinforcement Learning Based Modular Task Allocation Framework for Autonomous Mobile Systems","display_name":"An End-to-End Deep Reinforcement Learning Based Modular Task Allocation Framework for Autonomous Mobile Systems","publication_year":2024,"publication_date":"2024-02-21","ids":{"openalex":"https://openalex.org/W4392007398","doi":"https://doi.org/10.1109/tase.2024.3367237"},"language":"en","primary_location":{"id":"doi:10.1109/tase.2024.3367237","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tase.2024.3367237","pdf_url":null,"source":{"id":"https://openalex.org/S34881539","display_name":"IEEE Transactions on Automation Science and Engineering","issn_l":"1545-5955","issn":["1545-5955","1558-3783"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automation Science and Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://kclpure.kcl.ac.uk/portal/en/publications/9c0ce27e-4ece-422e-bdcd-c68581162d83","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100608193","display_name":"Song Ma","orcid":"https://orcid.org/0000-0003-4820-4857"},"institutions":[{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Song Ma","raw_affiliation_strings":["Department of Mechanical Engineering, University College London, London, U.K"],"affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, University College London, London, U.K","institution_ids":["https://openalex.org/I45129253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076546255","display_name":"Jingqing Ruan","orcid":"https://orcid.org/0000-0002-4857-9053"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210094879","display_name":"Shandong Institute of Automation","ror":"https://ror.org/00qdtba35","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210094879","https://openalex.org/I4210142748"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingqing Ruan","raw_affiliation_strings":["Institute of Automation, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210094879","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100957930","display_name":"Yali Du","orcid":"https://orcid.org/0000-0001-7759-3906"},"institutions":[{"id":"https://openalex.org/I183935753","display_name":"King's College London","ror":"https://ror.org/0220mzb33","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I183935753"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yali Du","raw_affiliation_strings":["Department of Informatics, King's College London, London, U.K","Department of Informatics, King&#x2019;s College London, London, U.K"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, King's College London, London, U.K","institution_ids":["https://openalex.org/I183935753"]},{"raw_affiliation_string":"Department of Informatics, King&#x2019;s College London, London, U.K","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089888737","display_name":"Richard Bucknall","orcid":"https://orcid.org/0000-0003-2542-7273"},"institutions":[{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Richard Bucknall","raw_affiliation_strings":["Department of Mechanical Engineering, University College London, London, U.K"],"affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, University College London, London, U.K","institution_ids":["https://openalex.org/I45129253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079073545","display_name":"Yuanchang Liu","orcid":"https://orcid.org/0000-0001-9306-297X"},"institutions":[{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yuanchang Liu","raw_affiliation_strings":["Department of Mechanical Engineering, University College London, London, U.K"],"affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, University College London, London, U.K","institution_ids":["https://openalex.org/I45129253"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100608193"],"corresponding_institution_ids":["https://openalex.org/I45129253"],"apc_list":null,"apc_paid":null,"fwci":6.5243,"has_fulltext":true,"cited_by_count":19,"citation_normalized_percentile":{"value":0.96954815,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"22","issue":null,"first_page":"1519","last_page":"1533"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9465000033378601,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9465000033378601,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13382","display_name":"Robotics and Automated Systems","score":0.9046000242233276,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7023375034332275},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.678709089756012},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.6777598261833191},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6338703036308289},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6255719065666199},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3869428336620331},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3581732511520386},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3259413540363312},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.30634403228759766},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3047066330909729},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.20290887355804443},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.19237172603607178}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7023375034332275},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.678709089756012},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.6777598261833191},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6338703036308289},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6255719065666199},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3869428336620331},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3581732511520386},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3259413540363312},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.30634403228759766},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3047066330909729},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.20290887355804443},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.19237172603607178}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tase.2024.3367237","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tase.2024.3367237","pdf_url":null,"source":{"id":"https://openalex.org/S34881539","display_name":"IEEE Transactions on Automation Science and Engineering","issn_l":"1545-5955","issn":["1545-5955","1558-3783"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automation Science and Engineering","raw_type":"journal-article"},{"id":"pmh:oai:kclpure.kcl.ac.uk:publications/9c0ce27e-4ece-422e-bdcd-c68581162d83","is_oa":true,"landing_page_url":"https://kclpure.kcl.ac.uk/portal/en/publications/9c0ce27e-4ece-422e-bdcd-c68581162d83","pdf_url":null,"source":{"id":"https://openalex.org/S4306400216","display_name":"Research Portal (King's College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I183935753","host_organization_name":"King's College London","host_organization_lineage":["https://openalex.org/I183935753"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ma, S, Ruan, J, Du, Y, Bucknall, R & Liu, Y 2024, 'An End-to-End Deep Reinforcement Learning Based Modular Task Allocation Framework for Autonomous Mobile Systems', IEEE TRANSACTIONS ON AUTOMATION SCIENCE AND ENGINEERING, pp. 1-15. https://doi.org/10.1109/TASE.2024.3367237","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:eprints.ucl.ac.uk.OAI2:10187647","is_oa":true,"landing_page_url":"https://discovery.ucl.ac.uk/id/eprint/10187647/","pdf_url":"https://discovery.ucl.ac.uk/id/eprint/10187647/1/An_End-to-End_Deep_Reinforcement_Learning_Based_Modular_Task_Allocation_Framework_for_Autonomous_Mobile_Systems.pdf","source":{"id":"https://openalex.org/S4306400024","display_name":"UCL Discovery (University College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I45129253","host_organization_name":"University College London","host_organization_lineage":["https://openalex.org/I45129253"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"   IEEE Transactions on Automation Science and Engineering     pp. 1-15.   (2024)     (In press).  ","raw_type":"Article"}],"best_oa_location":{"id":"pmh:oai:kclpure.kcl.ac.uk:publications/9c0ce27e-4ece-422e-bdcd-c68581162d83","is_oa":true,"landing_page_url":"https://kclpure.kcl.ac.uk/portal/en/publications/9c0ce27e-4ece-422e-bdcd-c68581162d83","pdf_url":null,"source":{"id":"https://openalex.org/S4306400216","display_name":"Research Portal (King's College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I183935753","host_organization_name":"King's College London","host_organization_lineage":["https://openalex.org/I183935753"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ma, S, Ruan, J, Du, Y, Bucknall, R & Liu, Y 2024, 'An End-to-End Deep Reinforcement Learning Based Modular Task Allocation Framework for Autonomous Mobile Systems', IEEE TRANSACTIONS ON AUTOMATION SCIENCE AND ENGINEERING, pp. 1-15. https://doi.org/10.1109/TASE.2024.3367237","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4294668922","display_name":"Probabilistic Inference Based Utility Evaluation and Path Generation for Active Autonomous Exploration of USVs in Unknown Confined Marine Environments","funder_award_id":"EP/Y000862/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G4919394629","display_name":null,"funder_award_id":"IEC\\NSFC\\191633","funder_id":"https://openalex.org/F4320320006","funder_display_name":"Royal Society"},{"id":"https://openalex.org/G8855365585","display_name":null,"funder_award_id":"RGS\\R2\\212343","funder_id":"https://openalex.org/F4320320006","funder_display_name":"Royal Society"}],"funders":[{"id":"https://openalex.org/F4320320006","display_name":"Royal Society","ror":"https://ror.org/03wnrjx87"},{"id":"https://openalex.org/F4320322725","display_name":"China Scholarship Council","ror":"https://ror.org/04atp4p48"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W1577509784","https://openalex.org/W1582224092","https://openalex.org/W1786686177","https://openalex.org/W1981245140","https://openalex.org/W2033320243","https://openalex.org/W2041970949","https://openalex.org/W2059810217","https://openalex.org/W2078619461","https://openalex.org/W2081230102","https://openalex.org/W2099034771","https://openalex.org/W2117211893","https://openalex.org/W2146444015","https://openalex.org/W2150884987","https://openalex.org/W2405827726","https://openalex.org/W2531521796","https://openalex.org/W2779455732","https://openalex.org/W2799899844","https://openalex.org/W2801036921","https://openalex.org/W2808797110","https://openalex.org/W2811066984","https://openalex.org/W2906282392","https://openalex.org/W2908084908","https://openalex.org/W2912261445","https://openalex.org/W2936516903","https://openalex.org/W2942394565","https://openalex.org/W2950752390","https://openalex.org/W2963220078","https://openalex.org/W2970645572","https://openalex.org/W2977190400","https://openalex.org/W3005764631","https://openalex.org/W3007469703","https://openalex.org/W3015712234","https://openalex.org/W3015726452","https://openalex.org/W3035965352","https://openalex.org/W3039235589","https://openalex.org/W3090939124","https://openalex.org/W3091127607","https://openalex.org/W3094206298","https://openalex.org/W3118630579","https://openalex.org/W3127305823","https://openalex.org/W3131664395","https://openalex.org/W3139666781","https://openalex.org/W3148561337","https://openalex.org/W3180008386","https://openalex.org/W3204996636","https://openalex.org/W3206064780","https://openalex.org/W4200340989","https://openalex.org/W4207024659","https://openalex.org/W4226197570","https://openalex.org/W4252571685","https://openalex.org/W4280571816","https://openalex.org/W4304142060","https://openalex.org/W4312565892","https://openalex.org/W6627195111","https://openalex.org/W6631190155","https://openalex.org/W6676023451","https://openalex.org/W6679974894","https://openalex.org/W6684205842","https://openalex.org/W6730742100","https://openalex.org/W6758687306","https://openalex.org/W6766978945","https://openalex.org/W6779032261"],"related_works":["https://openalex.org/W3179968364","https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W1999612375","https://openalex.org/W4380318855","https://openalex.org/W2031695474","https://openalex.org/W2024136090","https://openalex.org/W2586732548","https://openalex.org/W3049728571"],"abstract_inverted_index":{"Intelligent":[0],"decision-making":[1,74],"systems":[2,13,52],"that":[3,255,312],"can":[4,169,256],"solve":[5],"task":[6,45,80,97,112,139,270,317],"allocation":[7,46,90,98,140,271],"problems":[8],"are":[9,228,298,336],"critical":[10],"for":[11,48,82,172,293,309],"multi-robot":[12],"to":[14,42,53,60,102,147,189,200,234,251,326,370],"conduct":[15],"industrial":[16,340],"applications":[17,344],"in":[18,153,300,320,338],"a":[19,61,66,125,160,176,245,253,301,310,327],"collaborative":[20],"and":[21,55,217,226,347,351],"automated":[22],"way,":[23],"such":[24],"as":[25,71,124],"warehouse":[26,195,352],"inspection":[27,196],"using":[28,33,280],"mobile":[29,51,249],"robots,":[30],"hydrographic":[31],"surveying":[32],"unmanned":[34],"surface":[35],"vehicles,":[36],"etc.":[37],"This":[38,324],"paper,":[39],"therefore,":[40],"aims":[41],"address":[43],"the":[44,78,83,88,93,110,117,129,149,164,182,191,202,206,241,275,281,285,314,343,362,372,375],"problem":[47,67,107,121,134,242,331],"multi-agent":[49],"autonomous":[50,248],"autonomously":[54],"intelligently":[56],"allocate":[57],"multiple":[58,131,260,328],"tasks":[59],"fleet":[62,246],"of":[63,128,163,205,243,247,345],"robots.":[64,85],"Such":[65],"is":[68,100,122,187,198,238,267,367],"normally":[69],"regarded":[70],"an":[72,95,321],"independent":[73],"process":[75],"decoupled":[76],"from":[77],"following":[79],"planning":[81,113],"member":[84],"To":[86],"avoid":[87],"sub-optimal":[89],"caused":[91],"by":[92,240],"decoupling,":[94],"end-to-end":[96,138,264],"framework":[99,141,158,210,266],"proposed":[101,137,157,207],"tackle":[103,252],"this":[104],"combinatorial":[105],"optimisation":[106,118,308],"while":[108],"taking":[109],"succeeding":[111],"into":[114,259],"account":[115],"during":[116],"process.":[119],"The":[120,136,156,209,296],"formulated":[123],"special":[126],"variant":[127],"multi-depot":[130],"travelling":[132,329],"salesmen":[133,330],"(mTSP).":[135],"employs":[142],"deep":[143,286],"reinforcement":[144,165,282],"learning":[145,166,283],"methods":[146],"replace":[148],"handcrafted":[150],"heuristics":[151],"used":[152],"previous":[154],"works.":[155],"features":[159],"modular":[161,265],"design":[162],"agent":[167],"which":[168,304],"be":[170,257],"customised":[171],"various":[173,221],"applications.":[174],"Moreover,":[175],"real-robot":[177,218],"implementation":[178,358],"setup":[179],"based":[180,273,360],"on":[181,274,361],"Robot":[183,363],"Operating":[184,364],"System":[185,365],"2":[186,366],"presented":[188,369],"fulfil":[190],"simulation-to-reality":[192],"gap.":[193],"A":[194,356],"mission":[197,254],"executed":[199],"validate":[201],"training":[203],"outcome":[204],"framework.":[208,376],"has":[211],"been":[212],"cross-validated":[213],"via":[214],"both":[215],"simulated":[216],"tests":[219],"with":[220],"parameter":[222],"settings,":[223],"where":[224],"adaptability":[225],"performance":[227],"well":[229],"demonstrated.":[230],"<italic":[231],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[232],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Note":[233],"Practitioners</i>":[235],"\u2014This":[236],"paper":[237],"motivated":[239],"dispatching":[244],"robots":[250],"resolved":[258],"waypoint-following":[261],"tasks.":[262,295],"An":[263],"proposed,":[268],"making":[269],"decisions":[272],"given":[276],"waypoint":[277],"information.":[278],"By":[279],"technique,":[284],"neural":[287],"network":[288],"could":[289],"learn":[290],"sophisticated":[291],"policies":[292,297,335],"allocating":[294],"trained":[299],"specific":[302],"pattern":[303],"ensures":[305],"their":[306],"joint":[307],"solver":[311],"outputs":[313],"near":[315],"optimal":[316],"execution":[318],"sequences":[319],"efficient":[322],"way.":[323],"leads":[325],"(mTSP)":[332],"solution.":[333],"Pre-trained":[334],"tested":[337],"several":[339],"scenarios":[341],"reflecting":[342],"search":[346],"rescue,":[348],"maritime":[349],"surveying,":[350],"automation,":[353],"among":[354],"others.":[355],"hardware":[357],"configuration":[359],"also":[368],"support":[371],"practical":[373],"deployment":[374]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":14},{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
