{"id":"https://openalex.org/W3010746488","doi":"https://doi.org/10.1017/s0890060420000141","title":"Reinforcement learning-based collision avoidance: impact of reward function and knowledge transfer","display_name":"Reinforcement learning-based collision avoidance: impact of reward function and knowledge transfer","publication_year":2020,"publication_date":"2020-03-16","ids":{"openalex":"https://openalex.org/W3010746488","doi":"https://doi.org/10.1017/s0890060420000141","mag":"3010746488"},"language":"en","primary_location":{"id":"doi:10.1017/s0890060420000141","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s0890060420000141","pdf_url":null,"source":{"id":"https://openalex.org/S4210193102","display_name":"Artificial intelligence for engineering design analysis and manufacturing","issn_l":"0890-0604","issn":["0890-0604","1469-1760"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Artificial Intelligence for Engineering Design, Analysis and Manufacturing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088950956","display_name":"Xiongqing Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiongqing Liu","raw_affiliation_strings":["Department of Aerospace and Mechanical Engineering, University of Southern California, 3650 McClintock Avenue, OHE-430, Los Angeles, CA90089-1453, USA"],"affiliations":[{"raw_affiliation_string":"Department of Aerospace and Mechanical Engineering, University of Southern California, 3650 McClintock Avenue, OHE-430, Los Angeles, CA90089-1453, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023969203","display_name":"Yan Jin","orcid":"https://orcid.org/0000-0002-6502-5837"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yan Jin","raw_affiliation_strings":["Department of Aerospace and Mechanical Engineering, University of Southern California, 3650 McClintock Avenue, OHE-430, Los Angeles, CA90089-1453, USA"],"affiliations":[{"raw_affiliation_string":"Department of Aerospace and Mechanical Engineering, University of Southern California, 3650 McClintock Avenue, OHE-430, Los Angeles, CA90089-1453, USA","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5023969203"],"corresponding_institution_ids":["https://openalex.org/I1174212"],"apc_list":null,"apc_paid":null,"fwci":1.3256,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.84397496,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"34","issue":"2","first_page":"207","last_page":"222"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11622","display_name":"Maritime Navigation and Safety","score":0.9818000197410583,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8567616939544678},{"id":"https://openalex.org/keywords/collision-avoidance","display_name":"Collision avoidance","score":0.7289260625839233},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7248172163963318},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6123316287994385},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5283344984054565},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5134023427963257},{"id":"https://openalex.org/keywords/collision","display_name":"Collision","score":0.498687744140625},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4985053539276123},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.485723078250885},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.4579415023326874},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.39671576023101807},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32458382844924927},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14276573061943054},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.07951691746711731}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8567616939544678},{"id":"https://openalex.org/C2780864053","wikidata":"https://www.wikidata.org/wiki/Q5147495","display_name":"Collision avoidance","level":3,"score":0.7289260625839233},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7248172163963318},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6123316287994385},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5283344984054565},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5134023427963257},{"id":"https://openalex.org/C121704057","wikidata":"https://www.wikidata.org/wiki/Q352070","display_name":"Collision","level":2,"score":0.498687744140625},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4985053539276123},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.485723078250885},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.4579415023326874},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.39671576023101807},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32458382844924927},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14276573061943054},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.07951691746711731},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1017/s0890060420000141","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s0890060420000141","pdf_url":null,"source":{"id":"https://openalex.org/S4210193102","display_name":"Artificial intelligence for engineering design analysis and manufacturing","issn_l":"0890-0604","issn":["0890-0604","1469-1760"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Artificial Intelligence for Engineering Design, Analysis and Manufacturing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":59,"referenced_works":["https://openalex.org/W41554520","https://openalex.org/W178419168","https://openalex.org/W1487299580","https://openalex.org/W1506146479","https://openalex.org/W1507591516","https://openalex.org/W1527702126","https://openalex.org/W1576452626","https://openalex.org/W1583837637","https://openalex.org/W1589549454","https://openalex.org/W1757796397","https://openalex.org/W1821462560","https://openalex.org/W2017325967","https://openalex.org/W2024388749","https://openalex.org/W2031727428","https://openalex.org/W2047985315","https://openalex.org/W2059652044","https://openalex.org/W2060553467","https://openalex.org/W2061562262","https://openalex.org/W2073384958","https://openalex.org/W2076450475","https://openalex.org/W2078488070","https://openalex.org/W2088173505","https://openalex.org/W2089080976","https://openalex.org/W2103120971","https://openalex.org/W2121863487","https://openalex.org/W2157762168","https://openalex.org/W2162390675","https://openalex.org/W2164114810","https://openalex.org/W2165698076","https://openalex.org/W2168231600","https://openalex.org/W2168904841","https://openalex.org/W2173564293","https://openalex.org/W2174786457","https://openalex.org/W2201581102","https://openalex.org/W2253807446","https://openalex.org/W2257979135","https://openalex.org/W2277461844","https://openalex.org/W2339945684","https://openalex.org/W2397746618","https://openalex.org/W2404399993","https://openalex.org/W2415570156","https://openalex.org/W2463627759","https://openalex.org/W2467923710","https://openalex.org/W2472587927","https://openalex.org/W2485267956","https://openalex.org/W2528485485","https://openalex.org/W2531360032","https://openalex.org/W2618530766","https://openalex.org/W2746553466","https://openalex.org/W2766447205","https://openalex.org/W2899403804","https://openalex.org/W2919115771","https://openalex.org/W2952523895","https://openalex.org/W2963809389","https://openalex.org/W2964121744","https://openalex.org/W4205326910","https://openalex.org/W4211221179","https://openalex.org/W4237637693","https://openalex.org/W4245279042"],"related_works":["https://openalex.org/W2889566344","https://openalex.org/W4317634134","https://openalex.org/W2981729160","https://openalex.org/W2743212448","https://openalex.org/W4306904969","https://openalex.org/W4310743282","https://openalex.org/W1819938260","https://openalex.org/W2340892746","https://openalex.org/W3005999311","https://openalex.org/W3042530408"],"abstract_inverted_index":{"Abstract":[0],"Collision":[1],"avoidance":[2,79],"for":[3,20,182],"robots":[4,24],"and":[5,34,42,148,176,209,231],"vehicles":[6],"in":[7,140,205,214],"unpredictable":[8],"environments":[9],"is":[10,98,103,138,196],"a":[11,54,94,101,178,206],"challenging":[12],"task.":[13],"Various":[14],"control":[15],"strategies":[16],"have":[17,192],"been":[18],"developed":[19],"the":[21,29,32,36,87,116,166,200,210,226,229,232],"agent":[22,170,189],"(i.e.,":[23],"or":[25,132],"vehicles)":[26],"to":[27,39,58,76,92,109,154,198],"sense":[28],"environment,":[30],"assess":[31],"situation,":[33],"select":[35],"optimal":[37],"actions":[38],"avoid":[40],"collision":[41,59,78],"accomplish":[43],"its":[44],"mission.":[45],"In":[46,161],"our":[47],"research":[48],"on":[49,225],"autonomous":[50],"ships,":[51],"we":[52,164],"take":[53],"machine":[55],"learning":[56,83,136],"approach":[57],"avoidance.":[60],"The":[61,186,217],"lack":[62],"of":[63,68,168,202,219,228],"available":[64,104],"ship":[65,70],"steering":[66],"data":[67],"human":[69],"masters":[71],"has":[72],"made":[73],"it":[74,97,195],"necessary":[75],"acquire":[77],"knowledge":[80,118,153,184],"through":[81,172],"reinforcement":[82],"(RL).":[84],"Given":[85],"that":[86,100,115,194],"learned":[88,152],"neural":[89],"network":[90],"tends":[91],"be":[93,107,120,128],"black":[95],"box,":[96],"desirable":[99],"method":[102,137,181],"which":[105,141],"can":[106,119,127,143],"used":[108],"design":[110,212],"an":[111],"agent's":[112],"behavior":[113],"so":[114],"desired":[117],"captured.":[121],"Furthermore,":[122],"RL":[123,180],"with":[124],"complex":[125,159],"tasks":[126,147,230],"either":[129],"time":[130],"consuming":[131],"unfeasible.":[133],"A":[134],"multi-stage":[135,183],"needed":[139],"agents":[142],"learn":[144],"from":[145],"simple":[146],"then":[149],"transfer":[150,179,215],"their":[151],"closely":[155],"related":[156],"but":[157],"more":[158],"tasks.":[160],"this":[162],"paper,":[163],"explore":[165],"ways":[167],"designing":[169],"behaviors":[171],"tuning":[173],"reward":[174,207],"functions":[175],"devise":[177],"acquisition.":[185],"computer":[187],"simulation-based":[188],"training":[190],"results":[191],"shown":[193],"important":[197],"understand":[199],"roles":[201],"each":[203],"component":[204],"function":[208],"various":[211],"parameters":[213,221],"RL.":[216],"settings":[218],"these":[220],"are":[222],"all":[223],"dependent":[224],"complexity":[227],"similarities":[233],"between":[234],"them.":[235]},"counts_by_year":[{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}