{"id":"https://openalex.org/W4313140785","doi":"https://doi.org/10.1109/icpr56361.2022.9956121","title":"Learning to Drive Using Sparse Imitation Reinforcement Learning","display_name":"Learning to Drive Using Sparse Imitation Reinforcement Learning","publication_year":2022,"publication_date":"2022-08-21","ids":{"openalex":"https://openalex.org/W4313140785","doi":"https://doi.org/10.1109/icpr56361.2022.9956121"},"language":"en","primary_location":{"id":"doi:10.1109/icpr56361.2022.9956121","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr56361.2022.9956121","pdf_url":null,"source":{"id":"https://openalex.org/S4363607731","display_name":"2022 26th International Conference on Pattern Recognition (ICPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 26th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110953473","display_name":"Yuci Han","orcid":null},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yuci Han","raw_affiliation_strings":["The Ohio State University,Photogrammetric Computer Vision Lab,Columbus,OH,43210"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Photogrammetric Computer Vision Lab,Columbus,OH,43210","institution_ids":["https://openalex.org/I52357470"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5008672128","display_name":"Alper Y\u0131lmaz","orcid":"https://orcid.org/0000-0003-0755-2628"},"institutions":[{"id":"https://openalex.org/I52357470","display_name":"The Ohio State University","ror":"https://ror.org/00rs6vg23","country_code":"US","type":"education","lineage":["https://openalex.org/I52357470"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alper Yilmaz","raw_affiliation_strings":["The Ohio State University,Photogrammetric Computer Vision Lab,Columbus,OH,43210"],"affiliations":[{"raw_affiliation_string":"The Ohio State University,Photogrammetric Computer Vision Lab,Columbus,OH,43210","institution_ids":["https://openalex.org/I52357470"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5110953473"],"corresponding_institution_ids":["https://openalex.org/I52357470"],"apc_list":null,"apc_paid":null,"fwci":0.4185,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.60168548,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"3736","last_page":"3742"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9871000051498413,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8015425801277161},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7201870679855347},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.6439365148544312},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45760229229927063},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3367999792098999},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.12581726908683777},{"id":"https://openalex.org/keywords/neuroscience","display_name":"Neuroscience","score":0.04856690764427185}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8015425801277161},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7201870679855347},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.6439365148544312},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45760229229927063},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3367999792098999},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.12581726908683777},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.04856690764427185}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icpr56361.2022.9956121","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr56361.2022.9956121","pdf_url":null,"source":{"id":"https://openalex.org/S4363607731","display_name":"2022 26th International Conference on Pattern Recognition (ICPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 26th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities","score":0.8100000023841858}],"awards":[],"funders":[{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1569657508","https://openalex.org/W2037917209","https://openalex.org/W2167362547","https://openalex.org/W2194775991","https://openalex.org/W2781726626","https://openalex.org/W2962894046","https://openalex.org/W2964043796","https://openalex.org/W2967727187","https://openalex.org/W3009593063","https://openalex.org/W3034445502","https://openalex.org/W3034552332","https://openalex.org/W3034652687","https://openalex.org/W3098275351","https://openalex.org/W3172863135","https://openalex.org/W3175981141","https://openalex.org/W3193987867","https://openalex.org/W3198460218","https://openalex.org/W4214759451","https://openalex.org/W4287634727","https://openalex.org/W4295719664","https://openalex.org/W4299805595","https://openalex.org/W4362642573","https://openalex.org/W6634230617","https://openalex.org/W6692846177","https://openalex.org/W6745935785","https://openalex.org/W6747473740","https://openalex.org/W6752781648","https://openalex.org/W6772033386","https://openalex.org/W6784712800","https://openalex.org/W6785358402","https://openalex.org/W6799477656"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,229],"propose":[4],"Sparse":[5],"Imitation":[6],"Reinforcement":[7],"Learning":[8],"(SIRL),":[9],"a":[10,50,75,109,131,160,206,220],"hybrid":[11],"end-to-end":[12],"control":[13],"policy":[14,26,124,128],"that":[15],"combines":[16],"the":[17,87,93,99,117,121,126,136,139,143,150,155,170,175,180,195,202,213,225,231,243,250,257],"sparse":[18,37,122,140,196,203],"expert":[19,38,123,141,204],"driving":[20,29,89,133,176,258],"knowledge":[21,186],"with":[22,242],"reinforcement":[23],"learning":[24,240],"(RL)":[25],"for":[27,56,86,235],"autonomous":[28],"(AD)":[30],"task":[31],"in":[32,219],"CARLA":[33,226],"simulation":[34],"environment.":[35,262],"The":[36,263],"is":[39,46,80,173,266],"designed":[40],"based":[41],"on":[42],"hand-crafted":[43],"rules":[44],"which":[45],"suboptimal":[47],"but":[48],"provides":[49,108],"risk-averse":[51,236],"strategy":[52,107,148],"by":[53,115],"enforcing":[54],"experience":[55],"critical":[57],"scenarios":[58],"such":[59],"as":[60],"pedestrian":[61],"and":[62,65,82,125,153,163,198,205,238],"vehicle":[63],"avoidance,":[64],"traffic":[66],"light":[67],"detection.":[68],"As":[69],"it":[70,183,190],"has":[71],"been":[72],"demonstrated,":[73],"training":[74,145,151,188],"RL":[76,127,156,208,245],"agent":[77,172],"from":[78,98,158],"scratch":[79],"data-inefficient":[81],"time":[83],"consuming":[84],"particularly":[85],"urban":[88,222],"task,":[90],"due":[91],"to":[92,111,129,255,260],"complexity":[94],"of":[95,102,120,138,215],"situations":[96],"stemming":[97],"vast":[100],"size":[101],"state":[103],"space.":[104],"Our":[105],"SIRL":[106,147,171,217,232,251],"solution":[110],"solve":[112],"these":[113],"problems":[114],"fusing":[116],"output":[118],"distribution":[119],"generate":[130],"composite":[132],"policy.":[134],"With":[135],"guidance":[137],"during":[142,187],"early":[144],"stage,":[146],"accelerates":[149],"process":[152],"keeps":[154,191],"exploration":[157,237],"causing":[159],"catastrophe":[161],"outcome,":[162],"ensures":[164],"safe":[165],"exploration.":[166],"To":[167],"some":[168],"extent,":[169],"imitating":[174],"expert\u2019s":[177],"behavior.":[178],"At":[179],"same":[181],"time,":[182],"continuously":[184],"gains":[185],"therefore":[189],"making":[192],"improvement":[193],"beyond":[194],"expert,":[197],"can":[199],"surpass":[200],"both":[201],"traditional":[207,244],"agent.":[209],"We":[210,247],"experimentally":[211],"validate":[212],"efficacy":[214],"proposed":[216],"approach":[218],"complex":[221],"scenario":[223],"within":[224],"simulator.":[227],"Besides,":[228],"compare":[230],"agent\u2019s":[233,252],"performance":[234],"high":[239],"efficiency":[241],"approach.":[246],"additionally":[248],"demonstrate":[249],"generalization":[253],"ability":[254],"transfer":[256],"skill":[259],"unseen":[261],"supplementary":[264],"material":[265],"available":[267],"at":[268],"https://superhan2611.github.io/.":[269]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-25T13:04:00.132906","created_date":"2025-10-10T00:00:00"}