{"id":"https://openalex.org/W4387614515","doi":"https://doi.org/10.1145/3627822","title":"Interpretable Imitation Learning with Symbolic Rewards","display_name":"Interpretable Imitation Learning with Symbolic Rewards","publication_year":2023,"publication_date":"2023-10-13","ids":{"openalex":"https://openalex.org/W4387614515","doi":"https://doi.org/10.1145/3627822"},"language":"en","primary_location":{"id":"doi:10.1145/3627822","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3627822","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3627822","source":{"id":"https://openalex.org/S2492086750","display_name":"ACM Transactions on Intelligent Systems and Technology","issn_l":"2157-6904","issn":["2157-6904","2157-6912"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Intelligent Systems and Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3627822","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069965217","display_name":"Nicolas Bougie","orcid":"https://orcid.org/0000-0001-9856-0038"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Nicolas Bougie","raw_affiliation_strings":["NEC-AIST AI Cooperative Research Laboratory, National Institute of Advanced Industrial Science and Technology, Japan"],"raw_orcid":"https://orcid.org/0000-0001-9856-0038","affiliations":[{"raw_affiliation_string":"NEC-AIST AI Cooperative Research Laboratory, National Institute of Advanced Industrial Science and Technology, Japan","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103223622","display_name":"Takashi Onishi","orcid":"https://orcid.org/0009-0007-1018-7613"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takashi Onishi","raw_affiliation_strings":["NEC-AIST AI Cooperative Research Laboratory, National Institute of Advanced Industrial Science and Technology, Japan and NEC Corporation Data Science Research Laboratories, Japan"],"raw_orcid":"https://orcid.org/0009-0007-1018-7613","affiliations":[{"raw_affiliation_string":"NEC-AIST AI Cooperative Research Laboratory, National Institute of Advanced Industrial Science and Technology, Japan and NEC Corporation Data Science Research Laboratories, Japan","institution_ids":["https://openalex.org/I73613424"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064113904","display_name":"Yoshimasa Tsuruoka","orcid":"https://orcid.org/0000-0002-0707-1077"},"institutions":[{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yoshimasa Tsuruoka","raw_affiliation_strings":["NEC-AIST AI Cooperative Research Laboratory, National Institute of Advanced Industrial Science and Technology, Japan and Department of Information and Communication Engineering, The University of Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0002-0707-1077","affiliations":[{"raw_affiliation_string":"NEC-AIST AI Cooperative Research Laboratory, National Institute of Advanced Industrial Science and Technology, Japan and Department of Information and Communication Engineering, The University of Tokyo, Japan","institution_ids":["https://openalex.org/I73613424"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6526,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.75517685,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"15","issue":"1","first_page":"1","last_page":"34"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9894000291824341,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8523169755935669},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.676195502281189},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6317533850669861},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6192295551300049},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5624297261238098},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.44884926080703735},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.4293982982635498}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8523169755935669},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.676195502281189},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6317533850669861},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6192295551300049},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5624297261238098},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.44884926080703735},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.4293982982635498},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3627822","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3627822","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3627822","source":{"id":"https://openalex.org/S2492086750","display_name":"ACM Transactions on Intelligent Systems and Technology","issn_l":"2157-6904","issn":["2157-6904","2157-6912"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Intelligent Systems and Technology","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3627822","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3627822","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3627822","source":{"id":"https://openalex.org/S2492086750","display_name":"ACM Transactions on Intelligent Systems and Technology","issn_l":"2157-6904","issn":["2157-6904","2157-6912"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Intelligent Systems and Technology","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387614515.pdf","grobid_xml":"https://content.openalex.org/works/W4387614515.grobid-xml"},"referenced_works_count":89,"referenced_works":["https://openalex.org/W174941419","https://openalex.org/W1130790960","https://openalex.org/W1522301498","https://openalex.org/W1786044565","https://openalex.org/W1988526405","https://openalex.org/W2065356613","https://openalex.org/W2097571405","https://openalex.org/W2102099143","https://openalex.org/W2119717200","https://openalex.org/W2158782408","https://openalex.org/W2158969944","https://openalex.org/W2187089797","https://openalex.org/W2282821441","https://openalex.org/W2518731509","https://openalex.org/W2521274174","https://openalex.org/W2530263969","https://openalex.org/W2539402368","https://openalex.org/W2556477470","https://openalex.org/W2617547828","https://openalex.org/W2618851150","https://openalex.org/W2657631929","https://openalex.org/W2736601468","https://openalex.org/W2765204106","https://openalex.org/W2772337934","https://openalex.org/W2788455270","https://openalex.org/W2788862220","https://openalex.org/W2789160704","https://openalex.org/W2798338638","https://openalex.org/W2803974723","https://openalex.org/W2809925683","https://openalex.org/W2883535494","https://openalex.org/W2884552039","https://openalex.org/W2890882194","https://openalex.org/W2898138693","https://openalex.org/W2911918032","https://openalex.org/W2913210897","https://openalex.org/W2913350117","https://openalex.org/W2920329255","https://openalex.org/W2921802966","https://openalex.org/W2945976633","https://openalex.org/W2948609886","https://openalex.org/W2952561542","https://openalex.org/W2953073956","https://openalex.org/W2962862931","https://openalex.org/W2963099939","https://openalex.org/W2963672746","https://openalex.org/W2963847595","https://openalex.org/W2970222187","https://openalex.org/W2972122474","https://openalex.org/W2973379954","https://openalex.org/W2979200397","https://openalex.org/W2980225636","https://openalex.org/W2981702541","https://openalex.org/W2990963624","https://openalex.org/W2996001543","https://openalex.org/W2996061341","https://openalex.org/W3003997138","https://openalex.org/W3020831056","https://openalex.org/W3034514369","https://openalex.org/W3037429136","https://openalex.org/W3081056513","https://openalex.org/W3082925502","https://openalex.org/W3092213025","https://openalex.org/W3097719019","https://openalex.org/W3102824929","https://openalex.org/W3116286104","https://openalex.org/W3116637551","https://openalex.org/W3123767929","https://openalex.org/W3134840027","https://openalex.org/W3180268634","https://openalex.org/W3194668998","https://openalex.org/W3198804470","https://openalex.org/W3211658609","https://openalex.org/W4231827019","https://openalex.org/W4287278739","https://openalex.org/W4289543134","https://openalex.org/W4293507378","https://openalex.org/W4295074484","https://openalex.org/W4295720520","https://openalex.org/W4298235707","https://openalex.org/W4327652961","https://openalex.org/W4402843978","https://openalex.org/W6726913060","https://openalex.org/W6730153900","https://openalex.org/W6741002519","https://openalex.org/W6754669440","https://openalex.org/W6755291294","https://openalex.org/W6759301632","https://openalex.org/W7027579263"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W2031695474","https://openalex.org/W2024136090","https://openalex.org/W2586732548","https://openalex.org/W3049728571","https://openalex.org/W2964765435","https://openalex.org/W20361778"],"abstract_inverted_index":{"Sample":[0],"inefficiency":[1],"of":[2,57,74,125,143],"deep":[3],"reinforcement":[4],"learning":[5,43],"methods":[6],"is":[7,28,186],"a":[8,92,116,202],"major":[9],"obstacle":[10],"for":[11,94],"their":[12],"use":[13,166],"in":[14,31,41,84,196],"real-world":[15],"tasks":[16],"as":[17,199,201],"they":[18],"naturally":[19],"feature":[20],"sparse":[21],"rewards.":[22,161],"In":[23,88],"fact,":[24],"this":[25,55,163],"from-scratch":[26],"approach":[27],"often":[29],"impractical":[30],"environments":[32,198],"where":[33],"extreme":[34],"negative":[35],"outcomes":[36],"are":[37],"possible.":[38],"Recent":[39],"advances":[40],"imitation":[42],"have":[44],"improved":[45,134],"sample":[46],"efficiency":[47],"by":[48,115,137,178],"leveraging":[49],"expert":[50,66,101],"demonstrations.":[51],"Most":[52],"work":[53],"along":[54],"line":[56],"research":[58],"employs":[59],"neural":[60,77,170,184,225],"network-based":[61,226],"approaches":[62],"to":[63,80,118,150],"recover":[64],"an":[65,138,167],"cost":[67],"function.":[68],"However,":[69],"the":[70,85,108,123,126,129,141,144,209,215],"complexity":[71],"and":[72,82,121,135,218],"lack":[73],"transparency":[75],"make":[76],"networks":[78],"difficult":[79],"trust":[81],"deploy":[83],"real":[86],"world.":[87],"contrast,":[89],"we":[90,165],"present":[91],"method":[93,195],"extracting":[95],"interpretable":[96],"symbolic":[97,175,180],"reward":[98,110,130,145],"functions":[99],"from":[100],"data,":[102],"which":[103],"offers":[104],"several":[105],"advantages.":[106],"First,":[107],"learned":[109],"function":[111,131,146],"can":[112,132,147,212],"be":[113,133,148],"parsed":[114],"human":[117],"understand,":[119],"verify":[120],"predict":[122],"behavior":[124],"agent.":[127],"Second,":[128],"modified":[136],"expert.":[139],"Finally,":[140],"structure":[142],"leveraged":[149],"extract":[151],"explanations":[152],"that":[153,172,208],"encode":[154],"richer":[155],"domain":[156],"knowledge":[157],"than":[158,224],"standard":[159],"scalar":[160],"To":[162],"end,":[164],"autoregressive":[168],"recurrent":[169,183],"network":[171,185],"generates":[173],"hierarchical":[174],"rewards":[176,211],"represented":[177],"simple":[179],"trees.":[181],"The":[182],"trained":[187],"via":[188],"risk-seeking":[189],"policy":[190],"gradients.":[191],"We":[192,206],"test":[193],"our":[194],"MuJoCo":[197],"well":[200],"chemical":[203],"plant":[204],"simulator.":[205],"show":[207],"discovered":[210],"significantly":[213],"accelerate":[214],"training":[216],"process":[217],"achieve":[219],"similar":[220],"or":[221],"better":[222],"performance":[223],"algorithms.":[227]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
