{"id":"https://openalex.org/W7162070983","doi":"https://doi.org/10.48550/arxiv.2605.21768","title":"Memory-R2: Fair Credit Assignment for Long-Horizon Memory-Augmented LLM Agents","display_name":"Memory-R2: Fair Credit Assignment for Long-Horizon Memory-Augmented LLM Agents","publication_year":2026,"publication_date":"2026-05-20","ids":{"openalex":"https://openalex.org/W7162070983","doi":"https://doi.org/10.48550/arxiv.2605.21768"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.21768","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.21768","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.21768","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132755543","display_name":"Sikuan Yan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Sikuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120309421","display_name":"Ahmed Bahloul","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bahloul, Ahmed","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054893267","display_name":"Ercong Nie","orcid":"https://orcid.org/0000-0003-1453-4460"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nie, Ercong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040544054","display_name":"Susanna Schwarzmann","orcid":"https://orcid.org/0000-0002-3705-7559"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schwarzmann, Susanna","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089631314","display_name":"Riccardo Trivisonno","orcid":"https://orcid.org/0000-0003-4190-5781"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Trivisonno, Riccardo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136755857","display_name":"Volker Tresp","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tresp, Volker","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136734582","display_name":"Yunpu Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ma, Yunpu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6358000040054321,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.6358000040054321,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.06360000371932983,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.029500000178813934,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5848000049591064},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5641999840736389},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5467000007629395},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.4724000096321106},{"id":"https://openalex.org/keywords/core","display_name":"Core (optical fiber)","score":0.37119999527931213},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.36890000104904175},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.36419999599456787},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.34599998593330383}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7559999823570251},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5848000049591064},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5641999840736389},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5467000007629395},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.4724000096321106},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.37119999527931213},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.36890000104904175},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.36419999599456787},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.34599998593330383},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3409999907016754},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.33709999918937683},{"id":"https://openalex.org/C12186640","wikidata":"https://www.wikidata.org/wiki/Q6815743","display_name":"Memory model","level":3,"score":0.33469998836517334},{"id":"https://openalex.org/C82687282","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Auxiliary memory","level":2,"score":0.3093000054359436},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.2678999900817871},{"id":"https://openalex.org/C171675096","wikidata":"https://www.wikidata.org/wiki/Q1143380","display_name":"Extended memory","level":4,"score":0.265500009059906},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C63511323","wikidata":"https://www.wikidata.org/wiki/Q908936","display_name":"Interleaved memory","level":4,"score":0.2624000012874603},{"id":"https://openalex.org/C74426580","wikidata":"https://www.wikidata.org/wiki/Q719484","display_name":"Memory map","level":3,"score":0.25920000672340393},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.25679999589920044}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.21768","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.21768","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.21768","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.21768","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Memory-augmented":[0],"LLM":[1,118,193,233],"agents":[2,22,234],"enable":[3],"interactions":[4],"that":[5,211],"extend":[6],"beyond":[7],"finite":[8],"context":[9],"windows":[10],"by":[11],"storing,":[12],"updating,":[13],"and":[14,126,157,172,184],"reusing":[15],"information":[16],"across":[17],"sessions.":[18,222],"However,":[19],"training":[20,113,214,229],"such":[21,75],"with":[23,175],"reinforcement":[24],"learning":[25,135],"in":[26,235],"multi-session":[27,237],"environments":[28],"is":[29],"challenging":[30],"because":[31],"memory":[32,60,103,151,162,170,173,186,204],"turns":[33],"the":[34,57,88,148,191,213],"agent's":[35],"past":[36],"actions":[37],"into":[38],"part":[39],"of":[40],"its":[41],"future":[42],"environment.":[43,91],"Once":[44],"different":[45,51,144],"rollouts":[46,79],"write,":[47],"update,":[48],"or":[49,97],"delete":[50],"memories,":[52],"they":[53,84],"no":[54],"longer":[55],"share":[56],"same":[58,89,149,192],"intermediate":[59,150],"state,":[61,152],"making":[62],"trajectory-level":[63,93,138],"comparisons":[64,156],"fundamentally":[65],"unfair.":[66],"This":[67],"violates":[68],"a":[69,112,176,181,185,208],"key":[70],"assumption":[71],"behind":[72],"group-relative":[73,128],"methods":[74],"as":[76,82],"GRPO,":[77],"where":[78,180],"are":[80,188],"compared":[81],"if":[83],"were":[85],"sampled":[86],"from":[87,136,147,190,216],"effective":[90,228],"Consequently,":[92],"rewards":[94],"provide":[95,226],"noisy":[96],"biased":[98],"credit":[99,165],"signals":[100],"for":[101,115,161,231],"long-horizon":[102,116,137,236],"operations.":[104],"To":[105,198],"address":[106],"this":[107],"challenge,":[108],"we":[109,206],"introduce":[110],"Memory-R2,":[111],"framework":[114],"memory-augmented":[117,232],"agents.":[119],"Its":[120],"core":[121],"algorithm,":[122],"LoGo-GRPO,":[123],"combines":[124],"local":[125,141],"global":[127,131],"optimization.":[129],"The":[130],"objective":[132],"preserves":[133],"end-to-end":[134],"rewards,":[139],"while":[140],"rerollouts":[142],"compare":[143],"memory-operation":[145],"outcomes":[146],"yielding":[153],"fairer":[154],"group":[155],"more":[158],"precise":[159],"supervision":[160],"construction.":[163],"Beyond":[164],"assignment,":[166],"Memory-R2":[167],"jointly":[168],"optimizes":[169],"formation":[171],"evolution":[174],"shared-parameter":[177],"co-learning":[178],"design,":[179],"fact":[182],"extractor":[183],"manager":[187],"instantiated":[189],"backbone":[194],"through":[195],"role-specific":[196],"prompts.":[197],"stabilize":[199],"multi-step":[200],"RL":[201],"over":[202],"long":[203],"horizons,":[205],"adopt":[207],"progressive":[209],"curriculum":[210],"increases":[212],"horizon":[215],"8":[217],"to":[218,220],"16":[219],"32":[221],"Together,":[223],"these":[224],"components":[225],"an":[227],"paradigm":[230],"settings.":[238]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-23T00:00:00"}
