{"id":"https://openalex.org/W4394002448","doi":"https://doi.org/10.1109/tac.2024.3385680","title":"Reinforcement Learning for Partially Observable Linear Gaussian Systems Using Batch Dynamics of Noisy Observations","display_name":"Reinforcement Learning for Partially Observable Linear Gaussian Systems Using Batch Dynamics of Noisy Observations","publication_year":2024,"publication_date":"2024-04-05","ids":{"openalex":"https://openalex.org/W4394002448","doi":"https://doi.org/10.1109/tac.2024.3385680"},"language":"en","primary_location":{"id":"doi:10.1109/tac.2024.3385680","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2024.3385680","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036304677","display_name":"Farnaz Adib Yaghmaie","orcid":"https://orcid.org/0000-0002-6665-5881"},"institutions":[{"id":"https://openalex.org/I102134673","display_name":"Link\u00f6ping University","ror":"https://ror.org/05ynxx418","country_code":"SE","type":"education","lineage":["https://openalex.org/I102134673"]}],"countries":["SE"],"is_corresponding":true,"raw_author_name":"Farnaz Adib Yaghmaie","raw_affiliation_strings":["Faculty of Electrical Engineering, Link&#x00F6;ping University, Link&#x00F6;ping, Sweden"],"affiliations":[{"raw_affiliation_string":"Faculty of Electrical Engineering, Link&#x00F6;ping University, Link&#x00F6;ping, Sweden","institution_ids":["https://openalex.org/I102134673"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063118155","display_name":"Hamidreza Modares","orcid":"https://orcid.org/0000-0003-0800-5140"},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hamidreza Modares","raw_affiliation_strings":["College of Engineering, Michigan State University, East Lansing, MI, USA"],"affiliations":[{"raw_affiliation_string":"College of Engineering, Michigan State University, East Lansing, MI, USA","institution_ids":["https://openalex.org/I87216513"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058002446","display_name":"Fredrik Gustafsson","orcid":"https://orcid.org/0000-0003-3270-171X"},"institutions":[{"id":"https://openalex.org/I102134673","display_name":"Link\u00f6ping University","ror":"https://ror.org/05ynxx418","country_code":"SE","type":"education","lineage":["https://openalex.org/I102134673"]}],"countries":["SE"],"is_corresponding":false,"raw_author_name":"Fredrik Gustafsson","raw_affiliation_strings":["Faculty of Electrical Engineering, Link&#x00F6;ping University, Link&#x00F6;ping, Sweden"],"affiliations":[{"raw_affiliation_string":"Faculty of Electrical Engineering, Link&#x00F6;ping University, Link&#x00F6;ping, Sweden","institution_ids":["https://openalex.org/I102134673"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5036304677"],"corresponding_institution_ids":["https://openalex.org/I102134673"],"apc_list":null,"apc_paid":null,"fwci":1.3138,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.79029071,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":97},"biblio":{"volume":"69","issue":"9","first_page":"6397","last_page":"6404"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10711","display_name":"Target Tracking and Data Fusion in Sensor Networks","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9581000208854675,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/observable","display_name":"Observable","score":0.8276199698448181},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6788575649261475},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.5608751773834229},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5542765855789185},{"id":"https://openalex.org/keywords/control-theory","display_name":"Control theory (sociology)","score":0.5442999601364136},{"id":"https://openalex.org/keywords/gaussian-process","display_name":"Gaussian process","score":0.5365616679191589},{"id":"https://openalex.org/keywords/dynamics","display_name":"Dynamics (music)","score":0.512545108795166},{"id":"https://openalex.org/keywords/system-dynamics","display_name":"System dynamics","score":0.4732390344142914},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.44609078764915466},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39009642601013184},{"id":"https://openalex.org/keywords/statistical-physics","display_name":"Statistical physics","score":0.3424806594848633},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.16791892051696777},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.11030620336532593}],"concepts":[{"id":"https://openalex.org/C32848918","wikidata":"https://www.wikidata.org/wiki/Q845789","display_name":"Observable","level":2,"score":0.8276199698448181},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6788575649261475},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.5608751773834229},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5542765855789185},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.5442999601364136},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.5365616679191589},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.512545108795166},{"id":"https://openalex.org/C77405623","wikidata":"https://www.wikidata.org/wiki/Q598451","display_name":"System dynamics","level":2,"score":0.4732390344142914},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.44609078764915466},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39009642601013184},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.3424806594848633},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.16791892051696777},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.11030620336532593},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tac.2024.3385680","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tac.2024.3385680","pdf_url":null,"source":{"id":"https://openalex.org/S184954342","display_name":"IEEE Transactions on Automatic Control","issn_l":"0018-9286","issn":["0018-9286","1558-2523","2334-3303"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Automatic Control","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3129294593","display_name":null,"funder_award_id":"ECCS-2227311","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320322327","display_name":"Knut och Alice Wallenbergs Stiftelse","ror":"https://ror.org/004hzzk67"},{"id":"https://openalex.org/F4320322581","display_name":"Vetenskapsr\u00e5det","ror":"https://ror.org/03zttf063"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W178056938","https://openalex.org/W2011866373","https://openalex.org/W2121863487","https://openalex.org/W2151966330","https://openalex.org/W2395575420","https://openalex.org/W2607713949","https://openalex.org/W2822752092","https://openalex.org/W3010834964","https://openalex.org/W3135791668","https://openalex.org/W3203485495","https://openalex.org/W4210704481","https://openalex.org/W4250508157","https://openalex.org/W4252380473","https://openalex.org/W6677939520","https://openalex.org/W6712181171","https://openalex.org/W6737069938","https://openalex.org/W6746722099","https://openalex.org/W6753610117","https://openalex.org/W6754297394","https://openalex.org/W6775030691","https://openalex.org/W6779354364","https://openalex.org/W6791353669"],"related_works":["https://openalex.org/W1994680671","https://openalex.org/W2000283393","https://openalex.org/W2002320543","https://openalex.org/W2150232912","https://openalex.org/W2054940838","https://openalex.org/W2061947244","https://openalex.org/W4321855183","https://openalex.org/W3106170641","https://openalex.org/W1964286703","https://openalex.org/W2169866437"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,22],"algorithms":[2,23],"are":[3,24],"commonly":[4],"used":[5],"to":[6,26,38,80],"control":[7],"dynamical":[8,16],"systems":[9,66],"with":[10,67],"measurable":[11],"state":[12,49,94],"variables.":[13],"If":[14],"the":[15,29,48,58,72,82,101],"system":[17],"is":[18,37],"partially":[19],"observable,":[20],"reinforcement":[21],"modified":[25],"compensate":[27],"for":[28],"effect":[30,59],"of":[31,43,47,60,74,84,89,93],"partial":[32],"observability.":[33],"One":[34],"common":[35],"approach":[36,62,99],"feed":[39],"a":[40,86],"finite":[41,87],"history":[42,88],"input-output":[44,90],"data":[45,91],"instead":[46,92],"variable.":[50],"In":[51],"this":[52,61,98],"paper,":[53],"we":[54],"study":[55],"and":[56,95],"quantify":[57],"in":[63],"linear":[64],"Gaussian":[65],"quadratic":[68],"costs.":[69],"We":[70],"coin":[71],"concept":[73],"<italic":[75],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[76],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><inline-formula><tex-math":[77],"notation=\"LaTeX\">$L$</tex-math></inline-formula>-Extra-Sampled":[78],"(Les)-dynamics</i>":[79],"formalize":[81],"idea":[83],"using":[85],"show":[96],"that":[97],"increases":[100],"average":[102],"cost.":[103]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
