{"id":"https://openalex.org/W4405489252","doi":"https://doi.org/10.1109/embc53108.2024.10782800","title":"Dynamic Inverse Reinforcement Learning for Feedback-driven Reward Estimation in Brain Machine Interface Tasks","display_name":"Dynamic Inverse Reinforcement Learning for Feedback-driven Reward Estimation in Brain Machine Interface Tasks","publication_year":2024,"publication_date":"2024-07-15","ids":{"openalex":"https://openalex.org/W4405489252","doi":"https://doi.org/10.1109/embc53108.2024.10782800","pmid":"https://pubmed.ncbi.nlm.nih.gov/40039912"},"language":"en","primary_location":{"id":"doi:10.1109/embc53108.2024.10782800","is_oa":false,"landing_page_url":"https://doi.org/10.1109/embc53108.2024.10782800","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 46th Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006394794","display_name":"Jieyuan Tan","orcid":"https://orcid.org/0000-0003-2782-298X"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Jieyuan Tan","raw_affiliation_strings":["Hong Kong Univesrsity of Science and Technology,Department of Electronics and Computer Engineering,Hong Kong SAR,China"],"affiliations":[{"raw_affiliation_string":"Hong Kong Univesrsity of Science and Technology,Department of Electronics and Computer Engineering,Hong Kong SAR,China","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100383442","display_name":"Yiwen Wang","orcid":"https://orcid.org/0000-0001-8966-5938"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yiwen Wang","raw_affiliation_strings":["Hong Kong Univesrsity of Science and Technology,Department of Electronics and Computer Engineering Department of Chemical and Biological Engineering,Hong Kong SAR,China"],"affiliations":[{"raw_affiliation_string":"Hong Kong Univesrsity of Science and Technology,Department of Electronics and Computer Engineering Department of Chemical and Biological Engineering,Hong Kong SAR,China","institution_ids":["https://openalex.org/I200769079"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5006394794"],"corresponding_institution_ids":["https://openalex.org/I200769079"],"apc_list":null,"apc_paid":null,"fwci":0.329,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.61749343,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"2024","issue":null,"first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10042","display_name":"Neural and Behavioral Psychology Studies","score":0.9700999855995178,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10784","display_name":"Muscle activation and electromyography studies","score":0.9672999978065491,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7960138320922852},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7501504421234131},{"id":"https://openalex.org/keywords/brain\u2013computer-interface","display_name":"Brain\u2013computer interface","score":0.6355741620063782},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.5480419397354126},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.49047204852104187},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4684258699417114},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4625534117221832},{"id":"https://openalex.org/keywords/estimation","display_name":"Estimation","score":0.4164179265499115},{"id":"https://openalex.org/keywords/inverse-dynamics","display_name":"Inverse dynamics","score":0.4135400652885437},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1284492015838623},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.10816621780395508},{"id":"https://openalex.org/keywords/neuroscience","display_name":"Neuroscience","score":0.09172111749649048},{"id":"https://openalex.org/keywords/electroencephalography","display_name":"Electroencephalography","score":0.08299332857131958}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7960138320922852},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7501504421234131},{"id":"https://openalex.org/C173201364","wikidata":"https://www.wikidata.org/wiki/Q897410","display_name":"Brain\u2013computer interface","level":3,"score":0.6355741620063782},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.5480419397354126},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.49047204852104187},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4684258699417114},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4625534117221832},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.4164179265499115},{"id":"https://openalex.org/C187523126","wikidata":"https://www.wikidata.org/wiki/Q17098330","display_name":"Inverse dynamics","level":3,"score":0.4135400652885437},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1284492015838623},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.10816621780395508},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.09172111749649048},{"id":"https://openalex.org/C522805319","wikidata":"https://www.wikidata.org/wiki/Q179965","display_name":"Electroencephalography","level":2,"score":0.08299332857131958},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C39920418","wikidata":"https://www.wikidata.org/wiki/Q11476","display_name":"Kinematics","level":2,"score":0.0},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D000069550","descriptor_name":"Machine Learning","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D000465","descriptor_name":"Algorithms","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D005246","descriptor_name":"Feedback","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D005246","descriptor_name":"Feedback","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D005246","descriptor_name":"Feedback","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012054","descriptor_name":"Reinforcement, Psychology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012201","descriptor_name":"Reward","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012201","descriptor_name":"Reward","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012201","descriptor_name":"Reward","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D062207","descriptor_name":"Brain-Computer Interfaces","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D062207","descriptor_name":"Brain-Computer Interfaces","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D062207","descriptor_name":"Brain-Computer Interfaces","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":2,"locations":[{"id":"doi:10.1109/embc53108.2024.10782800","is_oa":false,"landing_page_url":"https://doi.org/10.1109/embc53108.2024.10782800","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 46th Annual International Conference of the IEEE Engineering in Medicine and Biology Society (EMBC)","raw_type":"proceedings-article"},{"id":"pmid:40039912","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40039912","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annual International Conference of the IEEE Engineering in Medicine and Biology Society. IEEE Engineering in Medicine and Biology Society. Annual International Conference","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W1507466738","https://openalex.org/W2063702730","https://openalex.org/W4225688536","https://openalex.org/W4226441468","https://openalex.org/W4293194371","https://openalex.org/W4295430387","https://openalex.org/W4312540261","https://openalex.org/W4387445329","https://openalex.org/W4389542210","https://openalex.org/W6804214189"],"related_works":["https://openalex.org/W3202969339","https://openalex.org/W4237513258","https://openalex.org/W2044053727","https://openalex.org/W1994410349","https://openalex.org/W3177028067","https://openalex.org/W1913385466","https://openalex.org/W2914170859","https://openalex.org/W2889342546","https://openalex.org/W2015048155","https://openalex.org/W1969223073"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,31],"(RL)-based":[2],"brain":[3],"machine":[4],"interfaces":[5],"(BMIs)":[6],"provide":[7],"a":[8,83,97,136,152,186,193,197],"promising":[9],"solution":[10],"for":[11,104,116,161,238,244],"paralyzed":[12],"people.":[13],"Enhancing":[14],"the":[15,23,42,59,65,75,113,142,158,169,174,178,210,218,224,234,246],"decoding":[16],"performance":[17],"of":[18,25,58,61,71,126,145,177,248],"RL-based":[19,249],"BMIs":[20],"relies":[21],"on":[22,185],"design":[24,247],"effective":[26],"reward":[27,50,86,99,106,114,144,159,200,206,240],"signals.":[28],"Inverse":[29],"reinforcement":[30],"(IRL)":[32],"offers":[33],"an":[34],"approach":[35],"to":[36,48,78,81,111,128,140,155,172,209],"infer":[37,157],"subjects'":[38,62,72],"own":[39],"evaluation":[40,73],"from":[41],"observed":[43],"behavior.":[44],"However,":[45],"applying":[46],"IRL":[47,76,89,102,138,221,236],"extract":[49],"information":[51],"in":[52,92],"complex":[53],"BMI":[54,93,148,188],"tasks":[55],"requires":[56,74],"consideration":[57],"dynamics":[60],"goal":[63],"during":[64,147],"control":[66],"process.":[67],"This":[68],"dynamic":[69,105,137,220,235],"nature":[70],"method":[77,139,184,203,222,237],"be":[79],"able":[80],"estimate":[82,141],"time":[84,198],"varying":[85,199],"function.":[87,100,201],"Previous":[88],"methods":[90,110],"applied":[91],"systems":[94],"only":[95],"estimated":[96],"static":[98],"Existing":[101],"algorithms":[103],"estimation":[107,207,241],"employ":[108],"optimization":[109],"approximate":[112],"map":[115,225],"each":[117,120,162],"state":[118],"at":[119],"time,":[121],"which":[122,191],"demands":[123],"substantial":[124],"amounts":[125],"data":[127],"achieve":[129],"convergence.":[130],"In":[131],"this":[132],"paper,":[133],"we":[134],"propose":[135],"feedback-driven":[143,239],"subjects":[146],"tasks.":[149],"We":[150,180],"utilize":[151],"state-observation":[153],"model":[154,173],"continuously":[156],"value":[160],"state,":[163],"with":[164,196],"sensory":[165],"feedback":[166],"serving":[167],"as":[168],"external":[170],"input":[171],"transition":[175],"process":[176],"reward.":[179],"evaluate":[181],"our":[182],"proposed":[183],"simulated":[187],"fetch":[189],"task,":[190],"is":[192],"multistep":[194],"task":[195],"Our":[202],"demonstrates":[204],"improved":[205],"close":[208],"ground":[211],"truth":[212],"value,":[213],"and":[214],"it":[215],"significantly":[216],"outperforms":[217],"existing":[219],"when":[223],"size":[226],"exceeds":[227],"25(p<0.01).":[228],"These":[229],"preliminary":[230],"results":[231],"suggests":[232],"that":[233],"holds":[242],"potential":[243],"improving":[245],"BMIs.":[250]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-19T08:26:33.389920","created_date":"2025-10-10T00:00:00"}
