{"id":"https://openalex.org/W2992712483","doi":"https://doi.org/10.1109/tcds.2019.2957831","title":"An Efficient Unified Approach Using Demonstrations for Inverse Reinforcement Learning","display_name":"An Efficient Unified Approach Using Demonstrations for Inverse Reinforcement Learning","publication_year":2019,"publication_date":"2019-12-05","ids":{"openalex":"https://openalex.org/W2992712483","doi":"https://doi.org/10.1109/tcds.2019.2957831","mag":"2992712483"},"language":"en","primary_location":{"id":"doi:10.1109/tcds.2019.2957831","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcds.2019.2957831","pdf_url":null,"source":{"id":"https://openalex.org/S2488537894","display_name":"IEEE Transactions on Cognitive and Developmental Systems","issn_l":"2379-8920","issn":["2379-8920","2379-8939"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cognitive and Developmental Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066851929","display_name":"Maxwell Hwang","orcid":"https://orcid.org/0000-0002-9505-6263"},"institutions":[{"id":"https://openalex.org/I4210158318","display_name":"Second Affiliated Hospital of Zhejiang University","ror":"https://ror.org/059cjpv64","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210158318"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Maxwell Hwang","raw_affiliation_strings":["Cancer Institute, Key Laboratory of Cancer Prevention and Intervention, China National Ministry of Education, Second Affiliated Hospital of Zhejiang University School of Medicine, Hangzhou, China","Department of Colorectal Surgery, Second Affiliated Hospital of Zhejiang University School of Medicine, Hangzhou, China","Key Laboratory of Molecular Biology in Medical Sciences, Second Affiliated Hospital, Zhejiang University School of Medicine, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cancer Institute, Key Laboratory of Cancer Prevention and Intervention, China National Ministry of Education, Second Affiliated Hospital of Zhejiang University School of Medicine, Hangzhou, China","institution_ids":["https://openalex.org/I4210158318"]},{"raw_affiliation_string":"Department of Colorectal Surgery, Second Affiliated Hospital of Zhejiang University School of Medicine, Hangzhou, China","institution_ids":["https://openalex.org/I4210158318"]},{"raw_affiliation_string":"Key Laboratory of Molecular Biology in Medical Sciences, Second Affiliated Hospital, Zhejiang University School of Medicine, Hangzhou, China","institution_ids":["https://openalex.org/I4210158318"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001081733","display_name":"Wei\u2010Cheng Jiang","orcid":"https://orcid.org/0000-0003-4432-8801"},"institutions":[{"id":"https://openalex.org/I169090423","display_name":"Tunghai University","ror":"https://ror.org/00zhvdn11","country_code":"TW","type":"education","lineage":["https://openalex.org/I169090423"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Wei-Cheng Jiang","raw_affiliation_strings":["Department of Electrical Engineering, Tunghai University, Taichung, Taiwan"],"raw_orcid":"https://orcid.org/0000-0003-4432-8801","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, Tunghai University, Taichung, Taiwan","institution_ids":["https://openalex.org/I169090423"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100731680","display_name":"Yu-Jen Chen","orcid":"https://orcid.org/0000-0003-0288-0796"},"institutions":[{"id":"https://openalex.org/I148099254","display_name":"National Chung Cheng University","ror":"https://ror.org/0028v3876","country_code":"TW","type":"education","lineage":["https://openalex.org/I148099254"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yu-Jen Chen","raw_affiliation_strings":["Department of Electrical Engineering, National Chung Cheng University, Chiayi City, Taiwan"],"raw_orcid":"https://orcid.org/0000-0003-0288-0796","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, National Chung Cheng University, Chiayi City, Taiwan","institution_ids":["https://openalex.org/I148099254"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061189209","display_name":"Kao\u2010Shing Hwang","orcid":"https://orcid.org/0000-0001-9234-4836"},"institutions":[{"id":"https://openalex.org/I142974352","display_name":"National Sun Yat-sen University","ror":"https://ror.org/00mjawt10","country_code":"TW","type":"education","lineage":["https://openalex.org/I142974352"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Kao-Shing Hwang","raw_affiliation_strings":["Department of Electrical Engineering, National Sun Yat-sen University, Kaohsiung, Taiwan"],"raw_orcid":"https://orcid.org/0000-0001-9234-4836","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, National Sun Yat-sen University, Kaohsiung, Taiwan","institution_ids":["https://openalex.org/I142974352"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112165524","display_name":"Yi-Chia Tseng","orcid":null},"institutions":[{"id":"https://openalex.org/I4210120917","display_name":"Taiwan Semiconductor Manufacturing Company (Taiwan)","ror":"https://ror.org/02wx79d08","country_code":"TW","type":"company","lineage":["https://openalex.org/I4210120917"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yi-Chia Tseng","raw_affiliation_strings":["Taiwan Semiconductor Manufacturing Company, Hsinchu, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Taiwan Semiconductor Manufacturing Company, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I4210120917"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.1446,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.60318153,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"13","issue":"3","first_page":"444","last_page":"452"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9732999801635742,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8399896025657654},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.689640462398529},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.681942343711853},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5056018829345703},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.43011346459388733},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35196399688720703}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8399896025657654},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.689640462398529},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.681942343711853},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5056018829345703},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.43011346459388733},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35196399688720703},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcds.2019.2957831","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcds.2019.2957831","pdf_url":null,"source":{"id":"https://openalex.org/S2488537894","display_name":"IEEE Transactions on Cognitive and Developmental Systems","issn_l":"2379-8920","issn":["2379-8920","2379-8939"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cognitive and Developmental Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7599999904632568,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1540371141","https://openalex.org/W1557517019","https://openalex.org/W1975987482","https://openalex.org/W1986014385","https://openalex.org/W1999874108","https://openalex.org/W2061562262","https://openalex.org/W2062525454","https://openalex.org/W2098774185","https://openalex.org/W2102564968","https://openalex.org/W2102568724","https://openalex.org/W2102847492","https://openalex.org/W2105156548","https://openalex.org/W2105947986","https://openalex.org/W2121863487","https://openalex.org/W2122838776","https://openalex.org/W2134491302","https://openalex.org/W2142828048","https://openalex.org/W2146354794","https://openalex.org/W2149458524","https://openalex.org/W2154633587","https://openalex.org/W2398964619","https://openalex.org/W2530520848","https://openalex.org/W4214717370","https://openalex.org/W6632223008","https://openalex.org/W6674884181","https://openalex.org/W6675977238","https://openalex.org/W6712937674"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4313488044","https://openalex.org/W3209574120","https://openalex.org/W4312192474","https://openalex.org/W4210805261"],"abstract_inverted_index":{"A":[0],"reinforcement":[1],"learning":[2,223,227],"(RF)":[3],"agent":[4],"is":[5,27,47],"always":[6],"equipped":[7],"with":[8,22,100,175,224],"a":[9,31,56,131,145,189],"designed":[10],"reward":[11,32,57,132,146,218],"function":[12,33,58,128,147,219],"to":[13,29,49,54,92,105,152,178,197,220],"correct":[14,93,124,155],"policies":[15],"for":[16,35],"optimal":[17],"decision":[18],"making":[19,162],"through":[20],"interactions":[21],"an":[23,51,115,216,225],"environment.":[24],"However,":[25,81],"it":[26],"difficult":[28],"design":[30,55],"appropriate":[34,217],"complex":[36],"RF":[37,45],"problems.":[38],"To":[39],"solve":[40],"this":[41],"difficulty,":[42],"the":[43,67,74,78,98,107,135,163,167,183,199,207,212],"inverse":[44],"(IRL)":[46],"introduced":[48],"provide":[50,70,173],"efficient":[52,226],"way":[53],"based":[59],"on":[60],"input":[61],"derived":[62],"from":[63,137,161],"knowledgeable":[64],"experts.":[65],"In":[66],"IRL,":[68],"experts":[69],"demonstrations":[71,84,156,172],"so":[72,95],"that":[73,97,110,148,206],"agents":[75,99,141,174],"can":[76,103,142,149,210],"imitate":[77],"behaviors":[79],"accordingly.":[80],"even":[82],"incorrect":[83,171],"have":[85],"merits,":[86],"some":[87,176],"of":[88,109,122,130,214],"which":[89,118],"are":[90,195],"similar":[91],"ones,":[94],"as":[96,166],"these":[101],"clues":[102,136],"endeavor":[104],"avoid":[106,179],"occurrence":[108],"behavior.":[111],"This":[112],"article":[113],"introduces":[114],"IRL":[116],"method":[117,209],"considers":[119],"two":[120,138],"types":[121],"demonstrations,":[123,140],"and":[125,157,191],"incorrect,":[126],"in":[127,182,229],"approximation":[129],"function.":[133],"Given":[134],"opposite":[139],"iteratively":[143],"approximate":[144],"guide":[150],"them":[151,160],"like":[153],"expert\u2019s":[154],"also,":[158],"prevent":[159],"same":[164],"mistakes":[165],"expert":[168],"did.":[169],"These":[170],"guidelines":[177],"erroneous":[180],"motions":[181],"initial":[184],"phase.":[185],"Two":[186],"simulated":[187],"tasks,":[188],"labyrinth":[190],"robot":[192],"soccer":[193],"games":[194],"conducted":[196],"validate":[198],"proposed":[200,208],"method.":[201],"The":[202],"simulation":[203],"results":[204],"show":[205],"achieve":[211],"objectives":[213],"generating":[215],"accomplish":[221],"apprentice":[222],"time":[228],"IRL.":[230]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-06-20T22:02:38.213706","created_date":"2025-10-10T00:00:00"}
