{"id":"https://openalex.org/W4410557498","doi":"https://doi.org/10.1145/3736417","title":"Automate Legibility through Inverse Reinforcement Learning","display_name":"Automate Legibility through Inverse Reinforcement Learning","publication_year":2025,"publication_date":"2025-05-21","ids":{"openalex":"https://openalex.org/W4410557498","doi":"https://doi.org/10.1145/3736417"},"language":"en","primary_location":{"id":"doi:10.1145/3736417","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3736417","pdf_url":null,"source":{"id":"https://openalex.org/S16632050","display_name":"ACM Transactions on Autonomous and Adaptive Systems","issn_l":"1556-4665","issn":["1556-4665","1556-4703"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Autonomous and Adaptive Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035258046","display_name":"Buxin Zeng","orcid":"https://orcid.org/0000-0003-0546-9628"},"institutions":[{"id":"https://openalex.org/I32394136","display_name":"Northumbria University","ror":"https://ror.org/049e6bc10","country_code":"GB","type":"education","lineage":["https://openalex.org/I32394136"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Buxin Zeng","raw_affiliation_strings":["Department of Computer and Information Sciences, Northumbria University, Newcastle upon Tyne, United Kingdom of Great Britain and Northern Ireland","Department of Computer and Information Sciences, Northumbria University, UK"],"raw_orcid":"https://orcid.org/0000-0003-0546-9628","affiliations":[{"raw_affiliation_string":"Department of Computer and Information Sciences, Northumbria University, Newcastle upon Tyne, United Kingdom of Great Britain and Northern Ireland","institution_ids":["https://openalex.org/I32394136"]},{"raw_affiliation_string":"Department of Computer and Information Sciences, Northumbria University, UK","institution_ids":["https://openalex.org/I32394136"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085263461","display_name":"Yinghui Pan","orcid":"https://orcid.org/0000-0001-5715-2855"},"institutions":[{"id":"https://openalex.org/I180726961","display_name":"Shenzhen University","ror":"https://ror.org/01vy4gh70","country_code":"CN","type":"education","lineage":["https://openalex.org/I180726961"]},{"id":"https://openalex.org/I4210152380","display_name":"Shenzhen Technology University","ror":"https://ror.org/04qzpec27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210152380"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yinghui Pan","raw_affiliation_strings":["School of Artificial Intelligence &amp; National Engineering Laboratory for Big Data System Computing Technology, Shenzhen University, Shenzhen, China","School of Artificial Intelligence &amp; National Engineering Laboratory for Big Data System Computing Technology, Shenzhen University, China"],"raw_orcid":"https://orcid.org/0000-0001-5715-2855","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence &amp; National Engineering Laboratory for Big Data System Computing Technology, Shenzhen University, Shenzhen, China","institution_ids":["https://openalex.org/I4210152380"]},{"raw_affiliation_string":"School of Artificial Intelligence &amp; National Engineering Laboratory for Big Data System Computing Technology, Shenzhen University, China","institution_ids":["https://openalex.org/I180726961"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115590761","display_name":"Jing Tang","orcid":"https://orcid.org/0000-0002-0821-4623"},"institutions":[{"id":"https://openalex.org/I32394136","display_name":"Northumbria University","ror":"https://ror.org/049e6bc10","country_code":"GB","type":"education","lineage":["https://openalex.org/I32394136"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jing Tang","raw_affiliation_strings":["Newcastle Business School, Northumbria University, Newcastle upon Tyne, United Kingdom of Great Britain and Northern Ireland","Newcastle Business School, Northumbria University, UK"],"raw_orcid":"https://orcid.org/0000-0002-0821-4623","affiliations":[{"raw_affiliation_string":"Newcastle Business School, Northumbria University, Newcastle upon Tyne, United Kingdom of Great Britain and Northern Ireland","institution_ids":["https://openalex.org/I32394136"]},{"raw_affiliation_string":"Newcastle Business School, Northumbria University, UK","institution_ids":["https://openalex.org/I32394136"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100653058","display_name":"Yifeng Zeng","orcid":"https://orcid.org/0000-0002-5246-403X"},"institutions":[{"id":"https://openalex.org/I32394136","display_name":"Northumbria University","ror":"https://ror.org/049e6bc10","country_code":"GB","type":"education","lineage":["https://openalex.org/I32394136"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Yifeng Zeng","raw_affiliation_strings":["Computer and Information Sciences, Northumbria University, Newcastle upon Tyne, United Kingdom of Great Britain and Northern Ireland","Department of Computer and Information Sciences, Northumbria University, UK"],"raw_orcid":"https://orcid.org/0000-0002-5246-403X","affiliations":[{"raw_affiliation_string":"Computer and Information Sciences, Northumbria University, Newcastle upon Tyne, United Kingdom of Great Britain and Northern Ireland","institution_ids":["https://openalex.org/I32394136"]},{"raw_affiliation_string":"Department of Computer and Information Sciences, Northumbria University, UK","institution_ids":["https://openalex.org/I32394136"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.7588,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.86534297,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"21","issue":"1","first_page":"1","last_page":"21"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9830999970436096,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/legibility","display_name":"Legibility","score":0.8929728269577026},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8612690567970276},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7300256490707397},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5051386952400208},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.45088398456573486},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.34999385476112366}],"concepts":[{"id":"https://openalex.org/C2779332521","wikidata":"https://www.wikidata.org/wiki/Q1820694","display_name":"Legibility","level":2,"score":0.8929728269577026},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8612690567970276},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7300256490707397},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5051386952400208},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.45088398456573486},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34999385476112366},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3736417","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3736417","pdf_url":null,"source":{"id":"https://openalex.org/S16632050","display_name":"ACM Transactions on Autonomous and Adaptive Systems","issn_l":"1556-4665","issn":["1556-4665","1556-4703"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Autonomous and Adaptive Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4836874524","display_name":null,"funder_award_id":"62276168 and 62176225","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1784395471","https://openalex.org/W1984794015","https://openalex.org/W1990991664","https://openalex.org/W1999874108","https://openalex.org/W2038622017","https://openalex.org/W2060846151","https://openalex.org/W2089566520","https://openalex.org/W2141481921","https://openalex.org/W2594775242","https://openalex.org/W2791581064","https://openalex.org/W2800521404","https://openalex.org/W2913756371","https://openalex.org/W2914933231","https://openalex.org/W3006063411","https://openalex.org/W3032916997","https://openalex.org/W3094060111","https://openalex.org/W3109424835","https://openalex.org/W3138984732","https://openalex.org/W3140646774","https://openalex.org/W3141956903","https://openalex.org/W3162904728","https://openalex.org/W3165815830","https://openalex.org/W3193577127","https://openalex.org/W4205326910","https://openalex.org/W4205882492","https://openalex.org/W4214717370","https://openalex.org/W4248056173","https://openalex.org/W4251170678","https://openalex.org/W4285185892","https://openalex.org/W4285419493","https://openalex.org/W4387188670","https://openalex.org/W4392953700","https://openalex.org/W4402301005"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4387369504","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"When":[0],"intelligent":[1],"agents":[2,69],"act":[3],"in":[4,36,101,107,120,131,165,185,216,225,233],"a":[5,27,103,132,144,150,162,186,205,217],"stochastic":[6],"environment,":[7],"the":[8,23,43,51,75,82,91,108,125,172,175,194,213,226],"principle":[9],"of":[10,22,93,174],"maximizing":[11],"expected":[12],"rewards":[13,25],"is":[14,63,182],"used":[15],"to":[16,42,73,89,115,141,148,160,200,211],"optimize":[17],"their":[18],"policies.":[19,84],"The":[20,97],"rationality":[21],"maximum":[24],"becomes":[26,87,129],"single":[28],"objective":[29],"when":[30,78,180,192],"agents\u2019":[31,44,76,94,166],"decision":[32,52,95,167,187],"problems":[33],"are":[34,55,80,199],"solved":[35],"most":[37],"cases.":[38],"This":[39],"sometimes":[40],"leads":[41],"behaviors":[45],"(the":[46],"optimal":[47,83],"policies":[48],"for":[49,65],"solving":[50],"problems)":[53],"that":[54],"not":[56],"legible":[57],".":[58],"In":[59,136],"other":[60,68],"words,":[61],"it":[62,86],"difficult":[64],"users":[66],"(or":[67],"and":[70,118,124,196,229],"even":[71],"humans)":[72],"understand":[74],"intentions":[77],"they":[79],"executing":[81],"Hence,":[85],"pertinent":[88],"consider":[90],"legibility":[92,105,122,145,163,181,197],"problems.":[96,109,168],"key":[98],"challenge":[99],"lies":[100],"formulating":[102],"proper":[104],"function":[106,146,164],"Using":[110],"domain":[111],"experts\u2019":[112],"inputs":[113],"leans":[114],"be":[116,201],"subjective":[117],"inconsistent":[119],"specifying":[121],"values,":[123],"manual":[126],"approach":[127],"quickly":[128],"infeasible":[130],"complex":[133],"problem":[134,223],"domain.":[135],"this":[137],"article,":[138],"we":[139],"aim":[140],"learn":[142],"such":[143],"parallel":[147],"developing":[149],"(conventional)":[151],"reward":[152,195],"function.":[153],"We":[154,169,203,221],"adopt":[155],"inverse":[156,176,207],"reinforcement":[157,177,208],"learning":[158,178,209],"techniques":[159],"automate":[161,212],"first":[170],"demonstrate":[171],"effectiveness":[173],"technique":[179],"solely":[183],"considered":[184],"problem.":[188],"Things":[189],"become":[190],"complicated":[191],"both":[193],"functions":[198,215],"found.":[202],"develop":[204],"multi-objective":[206],"method":[210],"two":[214],"good":[218],"balance":[219],"simultaneously.":[220],"vary":[222],"domains":[224],"performance":[227],"study":[228],"provide":[230],"empirical":[231],"results":[232],"support.":[234]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
