{"id":"https://openalex.org/W2981787210","doi":"https://doi.org/10.1007/s42484-020-00023-9","title":"On the convergence of projective-simulation\u2013based reinforcement learning in Markov decision processes","display_name":"On the convergence of projective-simulation\u2013based reinforcement learning in Markov decision processes","publication_year":2020,"publication_date":"2020-11-05","ids":{"openalex":"https://openalex.org/W2981787210","doi":"https://doi.org/10.1007/s42484-020-00023-9","mag":"2981787210","pmid":"https://pubmed.ncbi.nlm.nih.gov/33184611"},"language":"en","primary_location":{"id":"doi:10.1007/s42484-020-00023-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s42484-020-00023-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s42484-020-00023-9.pdf","source":{"id":"https://openalex.org/S4210217596","display_name":"Quantum Machine Intelligence","issn_l":"2524-4906","issn":["2524-4906","2524-4914"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Quantum Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s42484-020-00023-9.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"W. L. Boyajian","orcid":null},"institutions":[{"id":"https://openalex.org/I190249584","display_name":"Universit\u00e4t Innsbruck","ror":"https://ror.org/054pv6659","country_code":"AT","type":"education","lineage":["https://openalex.org/I190249584"]}],"countries":["AT"],"is_corresponding":true,"raw_author_name":"W. L. Boyajian","raw_affiliation_strings":["Institute for Theoretical Physics, University of Innsbruck, 6020 Innsbruck, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute for Theoretical Physics, University of Innsbruck, 6020 Innsbruck, Austria","institution_ids":["https://openalex.org/I190249584"]}]},{"author_position":"middle","author":{"id":null,"display_name":"J. Clausen","orcid":null},"institutions":[{"id":"https://openalex.org/I190249584","display_name":"Universit\u00e4t Innsbruck","ror":"https://ror.org/054pv6659","country_code":"AT","type":"education","lineage":["https://openalex.org/I190249584"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"J. Clausen","raw_affiliation_strings":["Institute for Theoretical Physics, University of Innsbruck, 6020 Innsbruck, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute for Theoretical Physics, University of Innsbruck, 6020 Innsbruck, Austria","institution_ids":["https://openalex.org/I190249584"]}]},{"author_position":"middle","author":{"id":null,"display_name":"L. M. Trenkwalder","orcid":"https://orcid.org/0000-0002-5690-707X"},"institutions":[{"id":"https://openalex.org/I190249584","display_name":"Universit\u00e4t Innsbruck","ror":"https://ror.org/054pv6659","country_code":"AT","type":"education","lineage":["https://openalex.org/I190249584"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"L. M. Trenkwalder","raw_affiliation_strings":["Institute for Theoretical Physics, University of Innsbruck, 6020 Innsbruck, Austria"],"raw_orcid":"https://orcid.org/0000-0002-5690-707X","affiliations":[{"raw_affiliation_string":"Institute for Theoretical Physics, University of Innsbruck, 6020 Innsbruck, Austria","institution_ids":["https://openalex.org/I190249584"]}]},{"author_position":"middle","author":{"id":null,"display_name":"V. Dunjko","orcid":null},"institutions":[{"id":"https://openalex.org/I121797337","display_name":"Leiden University","ror":"https://ror.org/027bh9e22","country_code":"NL","type":"education","lineage":["https://openalex.org/I121797337"]},{"id":"https://openalex.org/I190249584","display_name":"Universit\u00e4t Innsbruck","ror":"https://ror.org/054pv6659","country_code":"AT","type":"education","lineage":["https://openalex.org/I190249584"]}],"countries":["AT","NL"],"is_corresponding":false,"raw_author_name":"V. Dunjko","raw_affiliation_strings":["Institute for Theoretical Physics, University of Innsbruck, 6020 Innsbruck, Austria","LIACS, Leiden University, Niels Bohrweg 1, 2333 CA Leiden, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute for Theoretical Physics, University of Innsbruck, 6020 Innsbruck, Austria","institution_ids":["https://openalex.org/I190249584"]},{"raw_affiliation_string":"LIACS, Leiden University, Niels Bohrweg 1, 2333 CA Leiden, The Netherlands","institution_ids":["https://openalex.org/I121797337"]}]},{"author_position":"last","author":{"id":null,"display_name":"H. J. Briegel","orcid":null},"institutions":[{"id":"https://openalex.org/I189712700","display_name":"University of Konstanz","ror":"https://ror.org/0546hnb39","country_code":"DE","type":"education","lineage":["https://openalex.org/I189712700"]},{"id":"https://openalex.org/I190249584","display_name":"Universit\u00e4t Innsbruck","ror":"https://ror.org/054pv6659","country_code":"AT","type":"education","lineage":["https://openalex.org/I190249584"]}],"countries":["AT","DE"],"is_corresponding":false,"raw_author_name":"H. J. Briegel","raw_affiliation_strings":["Department of Philosophy, University of Konstanz, 78457 Konstanz, Germany","Institute for Theoretical Physics, University of Innsbruck, 6020 Innsbruck, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Philosophy, University of Konstanz, 78457 Konstanz, Germany","institution_ids":["https://openalex.org/I189712700"]},{"raw_affiliation_string":"Institute for Theoretical Physics, University of Innsbruck, 6020 Innsbruck, Austria","institution_ids":["https://openalex.org/I190249584"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I190249584"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.68,"has_fulltext":true,"cited_by_count":15,"citation_normalized_percentile":{"value":0.75633716,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"2","issue":"2","first_page":"13","last_page":"13"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10682","display_name":"Quantum Computing Algorithms and Architecture","score":0.9132000207901001,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10682","display_name":"Quantum Computing Algorithms and Architecture","score":0.9132000207901001,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10622","display_name":"Quantum Mechanics and Applications","score":0.02410000003874302,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10020","display_name":"Quantum Information and Cryptography","score":0.014999999664723873,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8730999827384949},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.7720999717712402},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5351999998092651},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5087000131607056},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.47940000891685486},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.4652999937534332},{"id":"https://openalex.org/keywords/projective-test","display_name":"Projective test","score":0.4496000111103058},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.4339999854564667}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8730999827384949},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7720999717712402},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6474000215530396},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5351999998092651},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5253999829292297},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5087000131607056},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.47940000891685486},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.4652999937534332},{"id":"https://openalex.org/C177846678","wikidata":"https://www.wikidata.org/wiki/Q1501864","display_name":"Projective test","level":2,"score":0.4496000111103058},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4456000030040741},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.4339999854564667},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.4041000008583069},{"id":"https://openalex.org/C17098449","wikidata":"https://www.wikidata.org/wiki/Q176814","display_name":"Partially observable Markov decision process","level":4,"score":0.4009000062942505},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.375900000333786},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.36419999599456787},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.32580000162124634},{"id":"https://openalex.org/C32254414","wikidata":"https://www.wikidata.org/wiki/Q4724364","display_name":"Algorithmic learning theory","level":3,"score":0.31859999895095825},{"id":"https://openalex.org/C28901747","wikidata":"https://www.wikidata.org/wiki/Q177571","display_name":"Decision theory","level":2,"score":0.30820000171661377},{"id":"https://openalex.org/C24138899","wikidata":"https://www.wikidata.org/wiki/Q17141258","display_name":"Instance-based learning","level":3,"score":0.29280000925064087},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.28850001096725464},{"id":"https://openalex.org/C176248197","wikidata":"https://www.wikidata.org/wiki/Q458526","display_name":"Probably approximately correct learning","level":4,"score":0.2849000096321106},{"id":"https://openalex.org/C115988155","wikidata":"https://www.wikidata.org/wiki/Q3262192","display_name":"Decision problem","level":2,"score":0.27970001101493835},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.27390000224113464},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.26440000534057617},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2526000142097473}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1007/s42484-020-00023-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s42484-020-00023-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s42484-020-00023-9.pdf","source":{"id":"https://openalex.org/S4210217596","display_name":"Quantum Machine Intelligence","issn_l":"2524-4906","issn":["2524-4906","2524-4914"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Quantum Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:33184611","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/33184611","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Quantum machine intelligence","raw_type":null},{"id":"pmh:oai:arXiv.org:1910.11914","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1910.11914","pdf_url":"https://arxiv.org/pdf/1910.11914","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:diglib.uibk.ac.at/:6542284","is_oa":true,"landing_page_url":"https://diglib.uibk.ac.at/doi/10.1007/s42484-020-00023-9","pdf_url":null,"source":{"id":"https://openalex.org/S4306401999","display_name":"Digital Library of the University of Innsbruck (University of Innsbruck)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I143397708","host_organization_name":"Innsbruck Medical University","host_organization_lineage":["https://openalex.org/I143397708"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},{"id":"pmh:oai:pubmedcentral.nih.gov:7644479","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/7644479","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Quantum Mach Intell","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1007/s42484-020-00023-9","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s42484-020-00023-9","pdf_url":"https://link.springer.com/content/pdf/10.1007/s42484-020-00023-9.pdf","source":{"id":"https://openalex.org/S4210217596","display_name":"Quantum Machine Intelligence","issn_l":"2524-4906","issn":["2524-4906","2524-4914"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Quantum Machine Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1275353404","display_name":"Atoms, Light, and Molecules","funder_award_id":"W1259","funder_id":"https://openalex.org/F4320321181","funder_display_name":"Austrian Science Fund"},{"id":"https://openalex.org/G225723456","display_name":null,"funder_award_id":"DK-ALM:W1259-N27","funder_id":"https://openalex.org/F4320321181","funder_display_name":"Austrian Science Fund"},{"id":"https://openalex.org/G249262831","display_name":null,"funder_award_id":"024.003.037","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G420050720","display_name":null,"funder_award_id":"024.003","funder_id":"https://openalex.org/F4320321800","funder_display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek"},{"id":"https://openalex.org/G4861696250","display_name":null,"funder_award_id":"F7102","funder_id":"https://openalex.org/F4320321181","funder_display_name":"Austrian Science Fund"},{"id":"https://openalex.org/G841528331","display_name":null,"funder_award_id":"SFB FoQus F4212","funder_id":"https://openalex.org/F4320321181","funder_display_name":"Austrian Science Fund"},{"id":"https://openalex.org/G8595571843","display_name":null,"funder_award_id":"SFB BeyondC F7102","funder_id":"https://openalex.org/F4320321181","funder_display_name":"Austrian Science Fund"}],"funders":[{"id":"https://openalex.org/F4320321181","display_name":"Austrian Science Fund","ror":"https://ror.org/013tf3c58"},{"id":"https://openalex.org/F4320321800","display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek","ror":"https://ror.org/04jsz6e67"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2981787210.pdf","grobid_xml":"https://content.openalex.org/works/W2981787210.grobid-xml"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1527117384","https://openalex.org/W1706310151","https://openalex.org/W1988148303","https://openalex.org/W1990514347","https://openalex.org/W2055845650","https://openalex.org/W2079905842","https://openalex.org/W2094000009","https://openalex.org/W2150339816","https://openalex.org/W2165131254","https://openalex.org/W2259729463","https://openalex.org/W2521267242","https://openalex.org/W2559394418","https://openalex.org/W2567223736","https://openalex.org/W2624637765","https://openalex.org/W2792315573","https://openalex.org/W2799011018","https://openalex.org/W2949945034","https://openalex.org/W2995202285","https://openalex.org/W3014809629"],"related_works":[],"abstract_inverted_index":{"In":[0,97],"recent":[1],"years,":[2],"the":[3,39,107,119],"interest":[4],"in":[5,30,59,93,133],"leveraging":[6],"quantum":[7,35,65],"effects":[8],"for":[9,38,90],"enhancing":[10],"machine":[11],"learning":[12,24,44,56,80,95,127,151],"tasks":[13],"has":[14],"significantly":[15],"increased.":[16],"Many":[17],"algorithms":[18],"speeding":[19],"up":[20],"supervised":[21],"and":[22],"unsupervised":[23],"were":[25,45],"established.":[26],"The":[27],"first":[28],"framework":[29],"which":[31,62],"ways":[32],"to":[33,130,149,154],"exploit":[34],"resources":[36],"specifically":[37],"broader":[40],"context":[41],"of":[42,71,106,109,118,137],"reinforcement":[43,55,79,126,150],"found":[46],"is":[47],"projective":[48,72,120],"simulation.":[49],"Projective":[50],"simulation":[51,73,121],"presents":[52],"an":[53],"agent-based":[54],"approach":[57,148],"designed":[58],"a":[60,102,125,134,145],"manner":[61],"may":[63],"support":[64],"walk-based":[66],"speedups.":[67],"Although":[68],"classical":[69],"variants":[70],"have":[74,87],"been":[75,88],"benchmarked":[76],"against":[77],"common":[78],"algorithms,":[81],"very":[82],"few":[83],"formal":[84,104],"theoretical":[85],"analyses":[86],"provided":[89],"its":[91],"performance":[92],"standard":[94],"scenarios.":[96],"this":[98,110],"paper,":[99],"we":[100,113],"provide":[101],"detailed":[103],"discussion":[105],"properties":[108],"model.":[111],"Specifically,":[112],"prove":[114],"that":[115,144],"one":[116],"version":[117],"model,":[122],"understood":[123],"as":[124],"approach,":[128],"converges":[129],"optimal":[131],"behavior":[132],"large":[135],"class":[136],"Markov":[138],"decision":[139],"processes.":[140],"This":[141],"proof":[142],"shows":[143],"physically":[146],"inspired":[147],"can":[152],"guarantee":[153],"converge.":[155]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":2}],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2019-11-01T00:00:00"}
