{"id":"https://openalex.org/W4410636407","doi":"https://doi.org/10.1145/3701716.3715224","title":"FAST-Q: Fast-track Exploration with Adversarially Balanced State Representations for Counterfactual Action Estimation in Offline Reinforcement Learning","display_name":"FAST-Q: Fast-track Exploration with Adversarially Balanced State Representations for Counterfactual Action Estimation in Offline Reinforcement Learning","publication_year":2025,"publication_date":"2025-05-08","ids":{"openalex":"https://openalex.org/W4410636407","doi":"https://doi.org/10.1145/3701716.3715224"},"language":"en","primary_location":{"id":"doi:10.1145/3701716.3715224","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3701716.3715224","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3701716.3715224","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3701716.3715224","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093643177","display_name":"Pulkit Agrawal","orcid":"https://orcid.org/0009-0009-5786-8891"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Pulkit Agrawal","raw_affiliation_strings":["Games24x7, Bengaluru, India"],"raw_orcid":"https://orcid.org/0009-0009-5786-8891","affiliations":[{"raw_affiliation_string":"Games24x7, Bengaluru, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069986100","display_name":"Rukma Talwadker","orcid":"https://orcid.org/0000-0002-1551-9679"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rukma Talwadker","raw_affiliation_strings":["Games24x7, Bengaluru, India"],"raw_orcid":"https://orcid.org/0000-0002-1551-9679","affiliations":[{"raw_affiliation_string":"Games24x7, Bengaluru, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075004641","display_name":"Aditya Pareek","orcid":"https://orcid.org/0009-0000-9079-5940"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aditya Pareek","raw_affiliation_strings":["Games24x7, Bengaluru, India"],"raw_orcid":"https://orcid.org/0009-0000-9079-5940","affiliations":[{"raw_affiliation_string":"Games24x7, Bengaluru, India","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063253011","display_name":"Tridib Mukherjee","orcid":"https://orcid.org/0009-0009-2385-1290"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tridib Mukherjee","raw_affiliation_strings":["Games24x7, Bengaluru, India"],"raw_orcid":"https://orcid.org/0009-0009-2385-1290","affiliations":[{"raw_affiliation_string":"Games24x7, Bengaluru, India","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5093643177"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05025015,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"85","last_page":"94"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9876000285148621,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9458000063896179,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8603487610816956},{"id":"https://openalex.org/keywords/counterfactual-thinking","display_name":"Counterfactual thinking","score":0.8322597742080688},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.702224850654602},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5742977857589722},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.569103479385376},{"id":"https://openalex.org/keywords/track","display_name":"Track (disk drive)","score":0.532824695110321},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4906705617904663},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.48101454973220825},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4681200087070465},{"id":"https://openalex.org/keywords/estimation","display_name":"Estimation","score":0.45296764373779297},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.18403026461601257},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1046212911605835},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.062132447957992554}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8603487610816956},{"id":"https://openalex.org/C108650721","wikidata":"https://www.wikidata.org/wiki/Q1783253","display_name":"Counterfactual thinking","level":2,"score":0.8322597742080688},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.702224850654602},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5742977857589722},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.569103479385376},{"id":"https://openalex.org/C89992363","wikidata":"https://www.wikidata.org/wiki/Q5961558","display_name":"Track (disk drive)","level":2,"score":0.532824695110321},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4906705617904663},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.48101454973220825},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4681200087070465},{"id":"https://openalex.org/C96250715","wikidata":"https://www.wikidata.org/wiki/Q965330","display_name":"Estimation","level":2,"score":0.45296764373779297},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.18403026461601257},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1046212911605835},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.062132447957992554},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3701716.3715224","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3701716.3715224","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3701716.3715224","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2504.21383","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2504.21383","pdf_url":"https://arxiv.org/pdf/2504.21383","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3701716.3715224","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3701716.3715224","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3701716.3715224","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4410636407.pdf","grobid_xml":"https://content.openalex.org/works/W4410636407.grobid-xml"},"referenced_works_count":14,"referenced_works":["https://openalex.org/W398859631","https://openalex.org/W1731081199","https://openalex.org/W1998649829","https://openalex.org/W2131953535","https://openalex.org/W2141481921","https://openalex.org/W2158782408","https://openalex.org/W2782696945","https://openalex.org/W2963842088","https://openalex.org/W3021693191","https://openalex.org/W3080169369","https://openalex.org/W4214717370","https://openalex.org/W4247105055","https://openalex.org/W4290943388","https://openalex.org/W4390532433"],"related_works":["https://openalex.org/W3201448254","https://openalex.org/W4286970243","https://openalex.org/W2066431708","https://openalex.org/W4384133558","https://openalex.org/W3025615835","https://openalex.org/W173210993","https://openalex.org/W2390660599","https://openalex.org/W3028847759","https://openalex.org/W2393688264","https://openalex.org/W3170174360"],"abstract_inverted_index":{"Recent":[0],"advancements":[1],"in":[2,41,154,194,199,205,212,223],"state-of-the-art":[3],"(SOTA)":[4],"offline":[5,94,151],"reinforcement":[6],"learning":[7,91],"(RL)":[8],"have":[9],"primarily":[10],"focused":[11],"on":[12,230],"addressing":[13],"function":[14],"approximation":[15],"errors,":[16],"which":[17,80],"contribute":[18],"to":[19,48,104,130],"the":[20,57,75,136,140,206,213,224,228],"overestimation":[21],"of":[22,182],"Q-values":[23],"for":[24,167],"out-of-distribution":[25,102],"actions":[26,100],"-":[27],"a":[28,121,163],"challenge":[29],"that":[30,124],"static":[31,157],"datasets":[32],"exacerbate.":[33],"However,":[34],"high-stakes":[35],"applications":[36],"such":[37,93],"as":[38,101],"recommendation":[39,207],"systems":[40],"online":[42,117],"gaming,":[43],"introduce":[44],"further":[45,72],"complexities":[46],"due":[47,103],"players'":[49,141,214],"psychology/":[50],"intent":[51],"driven":[52,208],"by":[53,74,96],"gameplay":[54],"experiences":[55],"and":[56,114,143,160,175,188,218],"platform's":[58],"inherent":[59],"volatility.":[60],"These":[61,178],"factors":[62],"create":[63],"highly":[64],"sparse,":[65],"partially":[66],"overlapping":[67],"state":[68,82,133,142],"spaces":[69,83],"across":[70,107],"policies,":[71],"influenced":[73],"experiment":[76],"path":[77],"selection":[78],"logic":[79],"biases":[81],"towards":[84],"specific":[85],"policies.":[86],"Current":[87],"SOTA":[88,186],"methods":[89],"constrain":[90],"from":[92],"data":[95,158],"clipping":[97],"known":[98],"counterfactual":[99,147,152],"poor":[105],"generalization":[106],"unobserved":[108],"states.":[109],"Further":[110],"aggravating":[111],"conservative":[112],"Q-learning":[113],"necessitating":[115],"more":[116],"exploration.":[118],"FAST-Q":[119,183],"introduces":[120],"novel":[122],"approach":[123],"(1)":[125],"leverages":[126],"Gradient":[127],"Reversal":[128],"Learning":[129],"construct":[131],"balanced":[132],"representations,":[134],"regularizing":[135],"policy-specific":[137],"bias":[138],"between":[139],"action":[144],"thereby":[145],"enabling":[146],"estimation;":[148],"(2)":[149],"supports":[150],"exploration":[153],"parallel":[155],"with":[156,227],"exploitation;":[159],"(3)":[161],"proposes":[162],"Q-value":[164],"decomposition":[165],"strategy":[166],"multi-objective":[168],"optimization,":[169],"facilitating":[170],"explainable":[171],"recommendations":[172],"over":[173,184],"short":[174],"long-term":[176],"objectives.":[177],"innovations":[179],"demonstrate":[180],"superiority":[181],"prior":[185],"approaches":[187],"demonstrates":[189],"at":[190],"least":[191],"0.15%":[192],"increase":[193],"player":[195],"returns,":[196],"2%":[197,210],"improvement":[198,211],"lifetime":[200],"value":[201],"(LTV),":[202],"0.4%":[203],"enhancement":[204],"engagement,":[209],"platform":[215],"dwell":[216],"time":[217],"an":[219],"impressive":[220],"10%":[221],"reduction":[222],"costs":[225],"associated":[226],"recommendation,":[229],"our":[231],"volatile":[232],"gaming":[233],"platform.":[234]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
