{"id":"https://openalex.org/W4399427224","doi":"https://doi.org/10.1007/s10994-024-06564-5","title":"The impact of data distribution on Q-learning with function approximation","display_name":"The impact of data distribution on Q-learning with function approximation","publication_year":2024,"publication_date":"2024-06-07","ids":{"openalex":"https://openalex.org/W4399427224","doi":"https://doi.org/10.1007/s10994-024-06564-5"},"language":"en","primary_location":{"id":"doi:10.1007/s10994-024-06564-5","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-024-06564-5","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-024-06564-5.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10994-024-06564-5.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075446522","display_name":"Pedro P. Santos","orcid":"https://orcid.org/0000-0002-4587-9528"},"institutions":[{"id":"https://openalex.org/I121345201","display_name":"Instituto de Engenharia de Sistemas e Computadores Investiga\u00e7\u00e3o e Desenvolvimento","ror":"https://ror.org/04mqy3p58","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I121345201","https://openalex.org/I4210125590"]},{"id":"https://openalex.org/I4387152517","display_name":"Instituto Superior T\u00e9cnico","ror":"https://ror.org/03db2by73","country_code":null,"type":"education","lineage":["https://openalex.org/I141596103","https://openalex.org/I4387152517"]}],"countries":["PT"],"is_corresponding":true,"raw_author_name":"Pedro P. Santos","raw_affiliation_strings":["INESC-ID, Lisbon, Portugal","Instituto Superior T\u00e9cnico, Lisbon, Portugal"],"affiliations":[{"raw_affiliation_string":"INESC-ID, Lisbon, Portugal","institution_ids":["https://openalex.org/I121345201"]},{"raw_affiliation_string":"Instituto Superior T\u00e9cnico, Lisbon, Portugal","institution_ids":["https://openalex.org/I4387152517"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009216751","display_name":"Diogo S. Carvalho","orcid":"https://orcid.org/0000-0003-3008-7322"},"institutions":[{"id":"https://openalex.org/I121345201","display_name":"Instituto de Engenharia de Sistemas e Computadores Investiga\u00e7\u00e3o e Desenvolvimento","ror":"https://ror.org/04mqy3p58","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I121345201","https://openalex.org/I4210125590"]},{"id":"https://openalex.org/I4387152517","display_name":"Instituto Superior T\u00e9cnico","ror":"https://ror.org/03db2by73","country_code":null,"type":"education","lineage":["https://openalex.org/I141596103","https://openalex.org/I4387152517"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Diogo S. Carvalho","raw_affiliation_strings":["INESC-ID, Lisbon, Portugal","Instituto Superior T\u00e9cnico, Lisbon, Portugal"],"affiliations":[{"raw_affiliation_string":"INESC-ID, Lisbon, Portugal","institution_ids":["https://openalex.org/I121345201"]},{"raw_affiliation_string":"Instituto Superior T\u00e9cnico, Lisbon, Portugal","institution_ids":["https://openalex.org/I4387152517"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091668153","display_name":"Alberto Sardinha","orcid":"https://orcid.org/0000-0002-5782-3142"},"institutions":[{"id":"https://openalex.org/I121345201","display_name":"Instituto de Engenharia de Sistemas e Computadores Investiga\u00e7\u00e3o e Desenvolvimento","ror":"https://ror.org/04mqy3p58","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I121345201","https://openalex.org/I4210125590"]},{"id":"https://openalex.org/I2699952","display_name":"Pontifical Catholic University of Rio de Janeiro","ror":"https://ror.org/01dg47b60","country_code":"BR","type":"education","lineage":["https://openalex.org/I2699952"]}],"countries":["BR","PT"],"is_corresponding":false,"raw_author_name":"Alberto Sardinha","raw_affiliation_strings":["INESC-ID, Lisbon, Portugal","Pontifical Catholic University of Rio de Janeiro, Rio de Janeiro, Brazil"],"affiliations":[{"raw_affiliation_string":"INESC-ID, Lisbon, Portugal","institution_ids":["https://openalex.org/I121345201"]},{"raw_affiliation_string":"Pontifical Catholic University of Rio de Janeiro, Rio de Janeiro, Brazil","institution_ids":["https://openalex.org/I2699952"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101616573","display_name":"Francisco S. Melo","orcid":"https://orcid.org/0000-0001-5705-7372"},"institutions":[{"id":"https://openalex.org/I4387152517","display_name":"Instituto Superior T\u00e9cnico","ror":"https://ror.org/03db2by73","country_code":null,"type":"education","lineage":["https://openalex.org/I141596103","https://openalex.org/I4387152517"]},{"id":"https://openalex.org/I121345201","display_name":"Instituto de Engenharia de Sistemas e Computadores Investiga\u00e7\u00e3o e Desenvolvimento","ror":"https://ror.org/04mqy3p58","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I121345201","https://openalex.org/I4210125590"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Francisco S. Melo","raw_affiliation_strings":["INESC-ID, Lisbon, Portugal","Instituto Superior T\u00e9cnico, Lisbon, Portugal"],"affiliations":[{"raw_affiliation_string":"INESC-ID, Lisbon, Portugal","institution_ids":["https://openalex.org/I121345201"]},{"raw_affiliation_string":"Instituto Superior T\u00e9cnico, Lisbon, Portugal","institution_ids":["https://openalex.org/I4387152517"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5075446522"],"corresponding_institution_ids":["https://openalex.org/I121345201","https://openalex.org/I4387152517"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":1.6237,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.86163189,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"113","issue":"9","first_page":"6141","last_page":"6163"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10270","display_name":"Blockchain Technology Applications and Security","score":0.987500011920929,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10270","display_name":"Blockchain Technology Applications and Security","score":0.987500011920929,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9864000082015991,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13553","display_name":"Age of Information Optimization","score":0.9767000079154968,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closeness","display_name":"Closeness","score":0.6410390734672546},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.629889965057373},{"id":"https://openalex.org/keywords/empirical-distribution-function","display_name":"Empirical distribution function","score":0.531893253326416},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.529144823551178},{"id":"https://openalex.org/keywords/distribution","display_name":"Distribution (mathematics)","score":0.4375017285346985},{"id":"https://openalex.org/keywords/data-point","display_name":"Data point","score":0.4300835430622101},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.41225147247314453},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3828631639480591},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.381415992975235},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3531913757324219},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.33791303634643555},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.29277652502059937},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.12319299578666687}],"concepts":[{"id":"https://openalex.org/C2779545769","wikidata":"https://www.wikidata.org/wiki/Q5135364","display_name":"Closeness","level":2,"score":0.6410390734672546},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.629889965057373},{"id":"https://openalex.org/C98385598","wikidata":"https://www.wikidata.org/wiki/Q1339385","display_name":"Empirical distribution function","level":2,"score":0.531893253326416},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.529144823551178},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.4375017285346985},{"id":"https://openalex.org/C21080849","wikidata":"https://www.wikidata.org/wiki/Q13611879","display_name":"Data point","level":2,"score":0.4300835430622101},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.41225147247314453},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3828631639480591},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.381415992975235},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3531913757324219},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.33791303634643555},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.29277652502059937},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.12319299578666687},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s10994-024-06564-5","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-024-06564-5","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-024-06564-5.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s10994-024-06564-5","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10994-024-06564-5","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10994-024-06564-5.pdf","source":{"id":"https://openalex.org/S62148650","display_name":"Machine Learning","issn_l":"0885-6125","issn":["0885-6125","1573-0565"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4103945517","display_name":null,"funder_award_id":"952215","funder_id":"https://openalex.org/F4320335254","funder_display_name":"Horizon 2020"},{"id":"https://openalex.org/G5111356591","display_name":null,"funder_award_id":"FA9550-22-1-0475","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"}],"funders":[{"id":"https://openalex.org/F4320323424","display_name":"Universidade de Lisboa","ror":"https://ror.org/01c27hj86"},{"id":"https://openalex.org/F4320335254","display_name":"Horizon 2020","ror":null},{"id":"https://openalex.org/F4320338279","display_name":"Air Force Office of Scientific Research","ror":"https://ror.org/011e9bt93"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4399427224.pdf"},"referenced_works_count":59,"referenced_works":["https://openalex.org/W166862392","https://openalex.org/W1575592356","https://openalex.org/W1646707810","https://openalex.org/W2012423427","https://openalex.org/W2012547817","https://openalex.org/W2017697298","https://openalex.org/W2104753538","https://openalex.org/W2128812357","https://openalex.org/W2130599357","https://openalex.org/W2131940723","https://openalex.org/W2139418546","https://openalex.org/W2145339207","https://openalex.org/W2169982856","https://openalex.org/W2173248099","https://openalex.org/W2301746362","https://openalex.org/W2397607997","https://openalex.org/W2545659366","https://openalex.org/W2766447205","https://openalex.org/W2900757704","https://openalex.org/W2902098903","https://openalex.org/W2942709080","https://openalex.org/W2947150733","https://openalex.org/W2990138404","https://openalex.org/W3009584650","https://openalex.org/W3012148463","https://openalex.org/W3016525976","https://openalex.org/W3022566517","https://openalex.org/W3032773894","https://openalex.org/W3033324992","https://openalex.org/W3037024314","https://openalex.org/W3048911479","https://openalex.org/W3097374585","https://openalex.org/W3100789280","https://openalex.org/W3101408474","https://openalex.org/W3113240575","https://openalex.org/W3128004107","https://openalex.org/W3128328080","https://openalex.org/W3134971306","https://openalex.org/W3180887884","https://openalex.org/W3187722890","https://openalex.org/W3213037073","https://openalex.org/W4214717370","https://openalex.org/W4221146510","https://openalex.org/W4221164312","https://openalex.org/W4250589301","https://openalex.org/W4298023569","https://openalex.org/W6604666559","https://openalex.org/W6637597983","https://openalex.org/W6677549092","https://openalex.org/W6682433095","https://openalex.org/W6741821246","https://openalex.org/W6757058172","https://openalex.org/W6760755450","https://openalex.org/W6765240361","https://openalex.org/W6785328086","https://openalex.org/W6788944699","https://openalex.org/W6846149835","https://openalex.org/W6854236268","https://openalex.org/W7001894244"],"related_works":["https://openalex.org/W2156910174","https://openalex.org/W1995054232","https://openalex.org/W2011510925","https://openalex.org/W1557920161","https://openalex.org/W1556709767","https://openalex.org/W1993023208","https://openalex.org/W4291020658","https://openalex.org/W2593813644","https://openalex.org/W2061476331","https://openalex.org/W2151215270"],"abstract_inverted_index":{"Abstract":[0],"We":[1,16,40,85],"study":[2],"the":[3,6,30,34,63,66,101,106,127,130,134,147,150,155,170,176],"interplay":[4],"between":[5],"data":[7,31,67,79,103,131,151,177,184],"distribution":[8,32,68,104,132,152,178],"and":[9,21,50,95,183],"Q":[10,37,137,160],"-learning-based":[11,38,138,161],"algorithms":[12,139,162],"with":[13,140],"function":[14,141],"approximation.":[15,142],"provide":[17],"a":[18,73,90,119],"unified":[19],"theoretical":[20],"empirical":[22,115],"analysis":[23],"as":[24,46,48,72,180],"to":[25,75,99,125,169,187],"how":[26],"different":[27,42,164,173],"properties":[28,153,174],"of":[29,36,44,65,93,108,110,129,136,149,157,172,175],"influence":[33],"performance":[35,135,156],"algorithms.":[39],"connect":[41],"lines":[43],"research,":[45],"well":[47],"validate":[49],"extend":[51],"previous":[52],"results,":[53],"being":[54],"primarily":[55],"focused":[56],"on":[57,154],"offline":[58,159],"settings.":[59],"First,":[60],"we":[61,117,144],"analyze":[62],"impact":[64,128,148],"by":[69],"using":[70],"optimization":[71],"tool":[74],"better":[76],"understand":[77],"which":[78],"distributions":[80,88],"yield":[81],"low":[82],"concentrability":[83],"coefficients.":[84],"motivate":[86],"high-entropy":[87],"from":[89,105,113],"game-theoretical":[91],"point":[92,107],"view":[94,109],"propose":[96],"an":[97,114],"algorithm":[98],"find":[100],"optimal":[102,188],"concentrability.":[111],"Second,":[112],"perspective,":[116],"introduce":[118],"novel":[120],"four-state":[121],"MDP":[122],"specifically":[123],"tailored":[124],"highlight":[126],"in":[133],"Finally,":[143],"experimentally":[145],"assess":[146],"two":[158],"under":[163],"environments.":[165],"Our":[166],"results":[167],"attest":[168],"importance":[171],"such":[179],"entropy,":[181],"coverage,":[182],"quality":[185],"(closeness":[186],"policy).":[189]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
