{"id":"https://openalex.org/W4416008204","doi":"https://doi.org/10.1007/s10994-025-06912-z","title":"Least-squares temporal difference with expected eligibility traces","display_name":"Least-squares temporal difference with expected eligibility traces","publication_year":2025,"publication_date":"2025-11-07","ids":{"openalex":"https://openalex.org/W4416008204","doi":"https://doi.org/10.1007/s10994-025-06912-z"},"language":"en","primary_location":{"id":"doi:10.1007/s10994-025-06912-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-025-06912-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-025-06912-z.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10994-025-06912-z.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024959253","display_name":"Roy van Zuijlen","orcid":null},"institutions":[{"id":"https://openalex.org/I83019370","display_name":"Eindhoven University of Technology","ror":"https://ror.org/02c2kyt77","country_code":"NL","type":"education","lineage":["https://openalex.org/I83019370"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Roy van Zuijlen","raw_affiliation_strings":["Control Systems Technology Section, Department of Mechanical Engineering, Eindhoven University of Technology, Eindhoven, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Control Systems Technology Section, Department of Mechanical Engineering, Eindhoven University of Technology, Eindhoven, The Netherlands","institution_ids":["https://openalex.org/I83019370"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5013274345","display_name":"Duarte Antunes","orcid":"https://orcid.org/0000-0003-3047-9334"},"institutions":[{"id":"https://openalex.org/I83019370","display_name":"Eindhoven University of Technology","ror":"https://ror.org/02c2kyt77","country_code":"NL","type":"education","lineage":["https://openalex.org/I83019370"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Duarte Antunes","raw_affiliation_strings":["Control Systems Technology Section, Department of Mechanical Engineering, Eindhoven University of Technology, Eindhoven, The Netherlands"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Control Systems Technology Section, Department of Mechanical Engineering, Eindhoven University of Technology, Eindhoven, The Netherlands","institution_ids":["https://openalex.org/I83019370"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5024959253"],"corresponding_institution_ids":["https://openalex.org/I83019370"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.15470122,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"114","issue":"12","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.13079999387264252,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.13079999387264252,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10711","display_name":"Target Tracking and Data Fusion in Sensor Networks","score":0.04699999839067459,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.03999999910593033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6269000172615051},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6173999905586243},{"id":"https://openalex.org/keywords/limit","display_name":"Limit (mathematics)","score":0.5776000022888184},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.5443000197410583},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.5306000113487244},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5153999924659729},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.49549999833106995},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.4634999930858612}],"concepts":[{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6269000172615051},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6173999905586243},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.5776000022888184},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.5443000197410583},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.5306000113487244},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5153999924659729},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.49549999833106995},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.46779999136924744},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.4634999930858612},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.40799999237060547},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.38839998841285706},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.3871000111103058},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.38440001010894775},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.34700000286102295},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.32440000772476196},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.32019999623298645},{"id":"https://openalex.org/C2778067643","wikidata":"https://www.wikidata.org/wiki/Q166507","display_name":"Interval (graph theory)","level":2,"score":0.30399999022483826},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.3021000027656555},{"id":"https://openalex.org/C141042865","wikidata":"https://www.wikidata.org/wiki/Q200125","display_name":"Expected value","level":2,"score":0.28600001335144043},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.2750000059604645},{"id":"https://openalex.org/C205706631","wikidata":"https://www.wikidata.org/wiki/Q2319304","display_name":"Expected utility hypothesis","level":2,"score":0.2703000009059906},{"id":"https://openalex.org/C163836022","wikidata":"https://www.wikidata.org/wiki/Q6771326","display_name":"Markov model","level":3,"score":0.2669000029563904}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s10994-025-06912-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-025-06912-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-025-06912-z.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},{"id":"pmh:oai:pure.tue.nl:openaire_cris_publications/26a5da1f-b9cc-4c64-9cd2-b77a6f342094","is_oa":true,"landing_page_url":"https://research.tue.nl/en/publications/26a5da1f-b9cc-4c64-9cd2-b77a6f342094","pdf_url":"https://pure.tue.nl/ws/files/371159683/2025_-_van_Zuijlen_-_Least-squares_temporal_difference_with_expected_eligibility_traces.pdf","source":{"id":"https://openalex.org/S4406922641","display_name":"TU/e Research Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"van Zuijlen, R A C & Guerreiro Tom\u00e9 Antunes, D J 2025, 'Least-squares temporal difference with expected eligibility traces', Machine Learning, vol. 114, no. 12, 269, pp. 1-21. https://doi.org/10.1007/s10994-025-06912-z","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.1007/s10994-025-06912-z","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-025-06912-z","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-025-06912-z.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416008204.pdf"},"referenced_works_count":11,"referenced_works":["https://openalex.org/W1507222174","https://openalex.org/W2019172585","https://openalex.org/W2056354534","https://openalex.org/W2072931156","https://openalex.org/W2123979492","https://openalex.org/W2132351269","https://openalex.org/W2141022000","https://openalex.org/W2665103848","https://openalex.org/W3041202696","https://openalex.org/W3173852058","https://openalex.org/W4288598445"],"related_works":[],"abstract_inverted_index":{"Abstract":[0],"Temporal":[1,6],"Difference":[2,7],"(TD)":[3],"and":[4,57,64,78,199,247,260,285,296],"Least-Squares":[5],"(LSTD)":[8],"are":[9],"related":[10],"methods":[11,255],"to":[12,32,48,102,140,167,176,216,292],"estimate":[13],"the":[14,34,71,121,124,149,160,169,231,234,240,243,248,271,275,280],"value":[15,35,72,289],"function":[16,36],"of":[17,86,113,151,165,195,233,274,294],"a":[18,26,40,50,184,193,223],"Markov":[19],"Decision":[20],"Process":[21],"(MDP).":[22],"While":[23,69,127],"TD":[24,63],"is":[25,39,174,214],"direct":[27,161],"method":[28,44],"using":[29,45],"local":[30],"data":[31,47,146],"update":[33],"estimate,":[37],"LSTD":[38,65,166,177,208],"Bellman":[41],"projected":[42],"equation":[43],"full":[46,145],"compute":[49],"one-time":[51],"estimate.":[52],"TD(":[53,74,98],"$$\\lambda":[54,59,75,80,99,218,225,257,265,298],"$$":[55,60,76,81,100,220,227,258,263,266,300],")":[56,61,77,82,101,221,259],"LSTD(":[58,79,217,297],"extend":[62],"with":[66,144,159,209],"eligibility":[67,92,172,179,191,197,201,212,235,245,276,283],"traces.":[68,89,202],"estimating":[70],"function,":[73],"use":[83],"actual":[84,106],"histories":[85,112],"features":[87,114],"as":[88,237,239],"Recently,":[90],"expected":[91,171,196,244,282],"traces":[93,173,180,198,213,277],"have":[94,117],"been":[95],"proposed":[96],"for":[97,132,222],"not":[103],"only":[104],"include":[105,168],"histories,":[107],"but":[108],"also":[109],"all":[110],"potential":[111],"that":[115,207,228,287],"could":[116],"occurred":[118],"based":[119],"on":[120,270],"model":[122],"or":[123],"available":[125],"data.":[126],"this":[128],"idea":[129],"can":[130],"account":[131],"non-linear":[133],"feature":[134,142],"architectures,":[135],"here":[136],"we":[137,188,205,252],"limit":[138],"ourselves":[139],"linear":[141],"architectures":[143],"updates":[147],"in":[148,156],"context":[150],"LSTD.":[152],"We":[153,182],"show":[154,206,286],"that,":[155],"striking":[157],"contrast":[158],"versions,":[162],"an":[163],"extension":[164],"theoretical":[170,210,281],"equivalent":[175,215],"without":[178],"(LSTD(0)).":[181],"obtain":[183],"similar":[185],"result":[186],"if":[187],"consider":[189,253],"mixed":[190,211],"traces;":[192],"combination":[194],"ordinary":[200,249],"In":[203],"fact,":[204],"^\\prime":[219,226,299],"given":[224],"captures":[229],"both":[230],"decay":[232],"trace,":[236],"well":[238],"balance":[241],"between":[242],"trace":[246],"trace.":[250],"Furthermore,":[251],"alternative":[254],"LSET(":[256,261],"$$\\eta":[262],",":[264],"),":[267],"which":[268],"rely":[269],"empirical":[272],"means":[273],"rather":[278],"than":[279],"traces,":[284],"their":[288],"estimates":[290],"converges":[291],"those":[293],"LSTD(0)":[295],").":[301]},"counts_by_year":[],"updated_date":"2026-06-13T07:54:00.901334","created_date":"2025-11-07T00:00:00"}
