{"id":"https://openalex.org/W7140972647","doi":"https://doi.org/10.48550/arxiv.2603.25138","title":"Reinforcement learning for quantum processes with memory","display_name":"Reinforcement learning for quantum processes with memory","publication_year":2026,"publication_date":"2026-03-26","ids":{"openalex":"https://openalex.org/W7140972647","doi":"https://doi.org/10.48550/arxiv.2603.25138"},"language":"en","primary_location":{"id":"pmh:oai:HAL:hal-05632999v1","is_oa":false,"landing_page_url":"https://hal.science/hal-05632999","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2026","raw_type":"Preprints, Working Papers, ..."},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.25138","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076789740","display_name":"Josep Lumbreras","orcid":"https://orcid.org/0009-0002-3541-6148"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lumbreras, Josep","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045430230","display_name":"Ruo Cheng Huang","orcid":"https://orcid.org/0000-0001-8135-8693"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Ruo Cheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103192677","display_name":"Yanglin Hu","orcid":"https://orcid.org/0009-0000-1105-589X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hu, Yanglin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042293106","display_name":"Marco Fanizza","orcid":"https://orcid.org/0000-0003-0802-8000"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fanizza, Marco","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5109521107","display_name":"Mile Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Mile","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11520","display_name":"Advanced Thermodynamics and Statistical Mechanics","score":0.4228000044822693,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11520","display_name":"Advanced Thermodynamics and Statistical Mechanics","score":0.4228000044822693,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10682","display_name":"Quantum Computing Algorithms and Architecture","score":0.1307000070810318,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.12330000102519989,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.746399998664856},{"id":"https://openalex.org/keywords/regret","display_name":"Regret","score":0.5878000259399414},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5562999844551086},{"id":"https://openalex.org/keywords/quantum","display_name":"Quantum","score":0.5273000001907349},{"id":"https://openalex.org/keywords/sublinear-function","display_name":"Sublinear function","score":0.5250999927520752},{"id":"https://openalex.org/keywords/quantum-algorithm","display_name":"Quantum algorithm","score":0.4311999976634979},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.41119998693466187},{"id":"https://openalex.org/keywords/quantum-state","display_name":"Quantum state","score":0.4059999883174896}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.746399998664856},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6255000233650208},{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.5878000259399414},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5562999844551086},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.5273000001907349},{"id":"https://openalex.org/C117160843","wikidata":"https://www.wikidata.org/wiki/Q338652","display_name":"Sublinear function","level":2,"score":0.5250999927520752},{"id":"https://openalex.org/C137019171","wikidata":"https://www.wikidata.org/wiki/Q2623817","display_name":"Quantum algorithm","level":3,"score":0.4311999976634979},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.41119998693466187},{"id":"https://openalex.org/C15706264","wikidata":"https://www.wikidata.org/wiki/Q230883","display_name":"Quantum state","level":3,"score":0.4059999883174896},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3849000036716461},{"id":"https://openalex.org/C2778926657","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum system","level":3,"score":0.34630000591278076},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.3452000021934509},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.325300008058548},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.32199999690055847},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.31839999556541443},{"id":"https://openalex.org/C141513077","wikidata":"https://www.wikidata.org/wiki/Q378542","display_name":"Independent and identically distributed random variables","level":3,"score":0.3165000081062317},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.30889999866485596},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29580000042915344},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.2921000123023987},{"id":"https://openalex.org/C11255438","wikidata":"https://www.wikidata.org/wiki/Q7269085","display_name":"Quantum process","level":4,"score":0.28790000081062317},{"id":"https://openalex.org/C58053490","wikidata":"https://www.wikidata.org/wiki/Q176555","display_name":"Quantum computer","level":3,"score":0.2770000100135803},{"id":"https://openalex.org/C122123141","wikidata":"https://www.wikidata.org/wiki/Q176623","display_name":"Random variable","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C8272713","wikidata":"https://www.wikidata.org/wiki/Q176737","display_name":"Stochastic process","level":2,"score":0.2635999917984009},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.260699987411499}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:HAL:hal-05632999v1","is_oa":false,"landing_page_url":"https://hal.science/hal-05632999","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2026","raw_type":"Preprints, Working Papers, ..."},{"id":"doi:10.48550/arxiv.2603.25138","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.25138","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.25138","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.25138","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0,209],"reinforcement":[1],"learning,":[2],"an":[3,8,97,244],"agent":[4,26,86,225],"interacts":[5],"sequentially":[6,88],"with":[7,55],"environment":[9,73],"to":[10,29,38,50,106,112,150,170,206,230],"maximize":[11,39],"a":[12,21,69,75,148,174,187,195],"reward,":[13],"receiving":[14],"only":[15],"partial,":[16],"probabilistic":[17],"feedback.":[18],"This":[19],"creates":[20],"fundamental":[22],"exploration-exploitation":[23],"trade-off:":[24],"the":[25,31,72,85,104,121,134,151,203,212,224,236],"must":[27],"explore":[28],"learn":[30],"hidden":[32,56,76,196],"dynamics":[33],"while":[34],"exploiting":[35],"this":[36,48,66,93,163,217],"knowledge":[37,201],"its":[40,232],"target":[41],"objective.":[42],"While":[43],"extensively":[44],"studied":[45],"classically,":[46],"applying":[47],"framework":[49,70],"quantum":[51,57,77,82,90,127,153,191],"systems":[52],"requires":[53],"dealing":[54],"states":[58,192],"that":[59,133,162],"evolve":[60],"via":[61,68,80,147],"unknown":[62,81],"dynamics.":[63],"We":[64,102],"formalize":[65],"problem":[67],"where":[71],"maintains":[74],"memory":[78],"evolving":[79],"channels,":[83],"and":[84,129],"intervenes":[87],"using":[89],"instruments.":[91],"For":[92],"setting,":[94,211],"we":[95,131,156,177],"adapt":[96],"optimistic":[98],"maximum-likelihood":[99],"estimation":[100,124],"algorithm.":[101],"extend":[103],"analysis":[105],"continuous":[107],"action":[108],"spaces,":[109],"allowing":[110],"us":[111],"model":[113],"general":[114],"positive":[115],"operator-valued":[116],"measures":[117],"(POVMs).":[118],"By":[119],"controlling":[120],"propagation":[122],"of":[123,137,189,200],"errors":[125],"through":[126],"channels":[128],"instruments,":[130],"prove":[132],"cumulative":[135,218,240],"regret":[136,214],"our":[138,210,221],"strategy":[139],"scales":[140],"as":[141],"$\\widetilde{\\mathcal{O}}(\\sqrt{K})$":[142],"over":[143],"$K$":[144],"episodes.":[145],"Furthermore,":[146],"reduction":[149],"multi-armed":[152],"bandit":[154],"problem,":[155],"establish":[157],"information-theoretic":[158],"lower":[159],"bounds":[160],"demonstrating":[161],"sublinear":[164,239],"scaling":[165],"is":[166],"strictly":[167],"optimal":[168],"up":[169],"polylogarithmic":[171],"factors.":[172],"As":[173],"physical":[175],"application,":[176],"consider":[178],"state-agnostic":[179],"work":[180],"extraction.":[181],"When":[182],"extracting":[183],"free":[184],"energy":[185,228],"from":[186],"sequence":[188],"non-i.i.d.":[190],"correlated":[193],"by":[194],"memory,":[197],"any":[198],"lack":[199],"about":[202],"source":[204],"leads":[205],"thermodynamic":[207],"dissipation.":[208,219],"mathematical":[213],"exactly":[215],"quantifies":[216],"Using":[220],"adaptive":[222],"algorithm,":[223],"uses":[226],"past":[227],"outcomes":[229],"improve":[231],"extraction":[233],"protocol":[234],"on":[235],"fly,":[237],"achieving":[238],"dissipation,":[241],"and,":[242],"consequently,":[243],"asymptotically":[245],"zero":[246],"dissipation":[247],"rate.":[248]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-28T00:00:00"}
