{"id":"https://openalex.org/W3196896000","doi":"https://doi.org/10.1109/icra46639.2022.9811660","title":"OPIRL: Sample Efficient Off-Policy Inverse Reinforcement Learning via Distribution Matching","display_name":"OPIRL: Sample Efficient Off-Policy Inverse Reinforcement Learning via Distribution Matching","publication_year":2022,"publication_date":"2022-05-23","ids":{"openalex":"https://openalex.org/W3196896000","doi":"https://doi.org/10.1109/icra46639.2022.9811660","mag":"3196896000"},"language":"en","primary_location":{"id":"doi:10.1109/icra46639.2022.9811660","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra46639.2022.9811660","pdf_url":null,"source":{"id":"https://openalex.org/S4363607759","display_name":"2022 International Conference on Robotics and Automation (ICRA)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070713466","display_name":"Hana Hoshino","orcid":null},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Hana Hoshino","raw_affiliation_strings":["School of Computing, Tokyo Institute of Technology,Department of Computer Science,Japan","Department of Computer Science, School of Computing, Tokyo Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"School of Computing, Tokyo Institute of Technology,Department of Computer Science,Japan","institution_ids":["https://openalex.org/I114531698"]},{"raw_affiliation_string":"Department of Computer Science, School of Computing, Tokyo Institute of Technology, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064968116","display_name":"Kei Ota","orcid":"https://orcid.org/0000-0002-5744-3027"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]},{"id":"https://openalex.org/I4210133125","display_name":"Mitsubishi Electric (Japan)","ror":"https://ror.org/033y26782","country_code":"JP","type":"company","lineage":["https://openalex.org/I1306287861","https://openalex.org/I4210133125"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Kei Ota","raw_affiliation_strings":["School of Computing, Tokyo Institute of Technology,Department of Computer Science,Japan","Department of Computer Science, School of Computing, Tokyo Institute of Technology, Japan","Information Technology R&D Center, Mitsubishi Electric Corporation, Japan"],"affiliations":[{"raw_affiliation_string":"School of Computing, Tokyo Institute of Technology,Department of Computer Science,Japan","institution_ids":["https://openalex.org/I114531698"]},{"raw_affiliation_string":"Department of Computer Science, School of Computing, Tokyo Institute of Technology, Japan","institution_ids":["https://openalex.org/I114531698"]},{"raw_affiliation_string":"Information Technology R&D Center, Mitsubishi Electric Corporation, Japan","institution_ids":["https://openalex.org/I4210133125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045505869","display_name":"Asako Kanezaki","orcid":"https://orcid.org/0000-0003-3217-1405"},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Asako Kanezaki","raw_affiliation_strings":["School of Computing, Tokyo Institute of Technology,Department of Computer Science,Japan","Department of Computer Science, School of Computing, Tokyo Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"School of Computing, Tokyo Institute of Technology,Department of Computer Science,Japan","institution_ids":["https://openalex.org/I114531698"]},{"raw_affiliation_string":"Department of Computer Science, School of Computing, Tokyo Institute of Technology, Japan","institution_ids":["https://openalex.org/I114531698"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025266920","display_name":"Rio Yokota","orcid":null},"institutions":[{"id":"https://openalex.org/I114531698","display_name":"Tokyo Institute of Technology","ror":"https://ror.org/0112mx960","country_code":"JP","type":"education","lineage":["https://openalex.org/I114531698"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Rio Yokota","raw_affiliation_strings":["Tokyo Institute of Technology,Global Scientific Information and Computing Center,Japan","Global Scientific Information and Computing Center, Tokyo Institute of Technology, Japan"],"affiliations":[{"raw_affiliation_string":"Tokyo Institute of Technology,Global Scientific Information and Computing Center,Japan","institution_ids":["https://openalex.org/I114531698"]},{"raw_affiliation_string":"Global Scientific Information and Computing Center, Tokyo Institute of Technology, Japan","institution_ids":["https://openalex.org/I114531698"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5070713466"],"corresponding_institution_ids":["https://openalex.org/I114531698"],"apc_list":null,"apc_paid":null,"fwci":0.7306,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.69359199,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"448","last_page":"454"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10409","display_name":"Fuel Cells and Related Materials","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.929099977016449,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.798331618309021},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.7462800741195679},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7132980227470398},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.7108798027038574},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.6503280401229858},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.6345725059509277},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.49262553453445435},{"id":"https://openalex.org/keywords/inverse","display_name":"Inverse","score":0.47488921880722046},{"id":"https://openalex.org/keywords/distribution","display_name":"Distribution (mathematics)","score":0.4512284994125366},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4268854260444641},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.4047721028327942},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39060455560684204},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.18260055780410767},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.08506903052330017}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.798331618309021},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.7462800741195679},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7132980227470398},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.7108798027038574},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.6503280401229858},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.6345725059509277},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.49262553453445435},{"id":"https://openalex.org/C207467116","wikidata":"https://www.wikidata.org/wiki/Q4385666","display_name":"Inverse","level":2,"score":0.47488921880722046},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.4512284994125366},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4268854260444641},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4047721028327942},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39060455560684204},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.18260055780410767},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.08506903052330017},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icra46639.2022.9811660","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra46639.2022.9811660","pdf_url":null,"source":{"id":"https://openalex.org/S4363607759","display_name":"2022 International Conference on Robotics and Automation (ICRA)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},{"id":"pmh:oai:t2r2.star.titech.ac.jp:50604711","is_oa":false,"landing_page_url":"http://t2r2.star.titech.ac.jp/cgi-bin/publicationinfo.cgi?q_publication_content_number=CTT100865536","pdf_url":null,"source":{"id":"https://openalex.org/S4377196385","display_name":"Tokyo Tech Research Repository (Tokyo Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I114531698","host_organization_name":"Tokyo Institute of Technology","host_organization_lineage":["https://openalex.org/I114531698"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":78,"referenced_works":["https://openalex.org/W1665214252","https://openalex.org/W1999874108","https://openalex.org/W2031571562","https://openalex.org/W2061562262","https://openalex.org/W2098774185","https://openalex.org/W2099471712","https://openalex.org/W2141559645","https://openalex.org/W2158782408","https://openalex.org/W2166944917","https://openalex.org/W2290104316","https://openalex.org/W2342840547","https://openalex.org/W2410842990","https://openalex.org/W2462906003","https://openalex.org/W2594103415","https://openalex.org/W2736601468","https://openalex.org/W2765407302","https://openalex.org/W2781726626","https://openalex.org/W2890026535","https://openalex.org/W2890822044","https://openalex.org/W2911383979","https://openalex.org/W2946233467","https://openalex.org/W2949561945","https://openalex.org/W2952854274","https://openalex.org/W2953220522","https://openalex.org/W2962879692","https://openalex.org/W2963099939","https://openalex.org/W2963277051","https://openalex.org/W2963289505","https://openalex.org/W2963301010","https://openalex.org/W2963391602","https://openalex.org/W2963508354","https://openalex.org/W2963590100","https://openalex.org/W2963817681","https://openalex.org/W2963836885","https://openalex.org/W2964121744","https://openalex.org/W2970370999","https://openalex.org/W2993185773","https://openalex.org/W2994977742","https://openalex.org/W2995376179","https://openalex.org/W3003373508","https://openalex.org/W3023876387","https://openalex.org/W3028821797","https://openalex.org/W3037606473","https://openalex.org/W3038629022","https://openalex.org/W3088304681","https://openalex.org/W3102359080","https://openalex.org/W3114820637","https://openalex.org/W3133214370","https://openalex.org/W3133628959","https://openalex.org/W3150177537","https://openalex.org/W3172795535","https://openalex.org/W3175744234","https://openalex.org/W3210324628","https://openalex.org/W4287996125","https://openalex.org/W4320013936","https://openalex.org/W6674884181","https://openalex.org/W6696380822","https://openalex.org/W6704559304","https://openalex.org/W6718092244","https://openalex.org/W6718836005","https://openalex.org/W6734517396","https://openalex.org/W6741002519","https://openalex.org/W6745347688","https://openalex.org/W6747473740","https://openalex.org/W6754484421","https://openalex.org/W6758568160","https://openalex.org/W6760325452","https://openalex.org/W6763248972","https://openalex.org/W6767217570","https://openalex.org/W6769977606","https://openalex.org/W6771249860","https://openalex.org/W6771428195","https://openalex.org/W6772303632","https://openalex.org/W6777406302","https://openalex.org/W6782766965","https://openalex.org/W6785260215","https://openalex.org/W6785775855","https://openalex.org/W6797649464"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W3162204513","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W2031695474","https://openalex.org/W2024136090","https://openalex.org/W4391331176","https://openalex.org/W2586732548","https://openalex.org/W3049728571"],"abstract_inverted_index":{"Inverse":[0,56],"Reinforcement":[1,57],"Learning":[2,58],"(IRL)":[3],"is":[4,87,109],"attractive":[5],"in":[6,38],"scenarios":[7],"where":[8,42,150],"reward":[9,84,144],"engineering":[10],"can":[11,45],"be":[12],"tedious.":[13],"However,":[14],"prior":[15,151],"IRL":[16,36],"algorithms":[17],"use":[18],"on-policy":[19,68],"transitions,":[20],"which":[21,60],"require":[22],"intensive":[23],"sampling":[24],"from":[25],"the":[26,39,74,79,120,142],"current":[27],"policy":[28,130],"for":[29,101],"stable":[30],"and":[31,69,96,114],"optimal":[32],"performance.":[33],"This":[34],"limits":[35],"applications":[37],"real":[40],"world,":[41],"environment":[43],"interactions":[44,77],"become":[46],"highly":[47],"expensive.":[48],"To":[49],"tackle":[50],"this":[51],"problem,":[52],"we":[53,138],"present":[54],"Off-Policy":[55],"(OPIRL),":[59],"(1)":[61],"adopts":[62],"off-policy":[63],"data":[64],"distribution":[65],"instead":[66],"of":[67,73,76],"enables":[70],"significant":[71],"reduction":[72],"number":[75],"with":[78,89,133],"environment,":[80],"(2)":[81],"learns":[82],"a":[83],"function":[85,145],"that":[86,106,141],"transferable":[88],"high":[90],"generalization":[91],"capabilities":[92],"on":[93,129],"changing":[94],"dynamics,":[95],"(3)":[97],"leverages":[98],"mode-covering":[99],"behavior":[100],"faster":[102],"convergence.":[103],"We":[104],"demonstrate":[105],"our":[107],"method":[108,123],"considerably":[110],"more":[111],"sample":[112],"efficient":[113],"generalizes":[115,146],"to":[116,147,155],"novel":[117],"environments":[118],"through":[119],"experiments.":[121],"Our":[122],"achieves":[124],"better":[125],"or":[126],"comparable":[127],"results":[128],"performance":[131],"baselines":[132],"significantly":[134],"fewer":[135],"interactions.":[136],"Furthermore,":[137],"empirically":[139],"show":[140],"recovered":[143],"different":[148],"tasks":[149],"arts":[152],"are":[153],"prone":[154],"fail.":[156]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
