{"id":"https://openalex.org/W4409657161","doi":"https://doi.org/10.1145/3696410.3714562","title":"Policy-Guided Causal State Representation for Offline Reinforcement Learning Recommendation","display_name":"Policy-Guided Causal State Representation for Offline Reinforcement Learning Recommendation","publication_year":2025,"publication_date":"2025-04-22","ids":{"openalex":"https://openalex.org/W4409657161","doi":"https://doi.org/10.1145/3696410.3714562"},"language":"en","primary_location":{"id":"doi:10.1145/3696410.3714562","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3696410.3714562","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3696410.3714562","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3696410.3714562","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100358930","display_name":"Siyu Wang","orcid":"https://orcid.org/0009-0008-8726-5277"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Siyu Wang","raw_affiliation_strings":["The University of New South Wales, Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"The University of New South Wales, Sydney, Australia","institution_ids":["https://openalex.org/I31746571"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057487706","display_name":"Xiaocong Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"government","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Xiaocong Chen","raw_affiliation_strings":["Data 61, CSIRO, Eveleigh, Australia"],"affiliations":[{"raw_affiliation_string":"Data 61, CSIRO, Eveleigh, Australia","institution_ids":["https://openalex.org/I42894916","https://openalex.org/I1292875679"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052731721","display_name":"Lina Yao","orcid":"https://orcid.org/0000-0002-4149-839X"},"institutions":[{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"government","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]},{"id":"https://openalex.org/I42894916","display_name":"Data61","ror":"https://ror.org/03q397159","country_code":"AU","type":"other","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I42894916","https://openalex.org/I4387156119"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Lina Yao","raw_affiliation_strings":["Data 61, CSIRO, Eveleigh, Australia and The University of New South Wales, Sydney, Australia"],"affiliations":[{"raw_affiliation_string":"Data 61, CSIRO, Eveleigh, Australia and The University of New South Wales, Sydney, Australia","institution_ids":["https://openalex.org/I42894916","https://openalex.org/I31746571","https://openalex.org/I1292875679"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100358930"],"corresponding_institution_ids":["https://openalex.org/I31746571"],"apc_list":null,"apc_paid":null,"fwci":7.155,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.96343359,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"402","last_page":"412"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10603","display_name":"Smart Grid Energy Management","score":0.9829999804496765,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8802422285079956},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7567227482795715},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5441521406173706},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5437648296356201},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.5006749629974365},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.47986501455307007},{"id":"https://openalex.org/keywords/recommender-system","display_name":"Recommender system","score":0.46266674995422363},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08356747031211853}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8802422285079956},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7567227482795715},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5441521406173706},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5437648296356201},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.5006749629974365},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47986501455307007},{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.46266674995422363},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08356747031211853},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3696410.3714562","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3696410.3714562","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3696410.3714562","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3696410.3714562","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3696410.3714562","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3696410.3714562","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4409657161.pdf","grobid_xml":"https://content.openalex.org/works/W4409657161.grobid-xml"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W1995688924","https://openalex.org/W2137063737","https://openalex.org/W2787666871","https://openalex.org/W2885305518","https://openalex.org/W2963654596","https://openalex.org/W2990138404","https://openalex.org/W3123348991","https://openalex.org/W3156939347","https://openalex.org/W3208231292","https://openalex.org/W4290876156","https://openalex.org/W4292419518","https://openalex.org/W4292423901","https://openalex.org/W4293569092","https://openalex.org/W4318159260","https://openalex.org/W4383993482","https://openalex.org/W4384408011","https://openalex.org/W4384891727","https://openalex.org/W4385562472","https://openalex.org/W4388740239","https://openalex.org/W4396216288","https://openalex.org/W4400528915","https://openalex.org/W4401863542"],"related_works":["https://openalex.org/W4390273403","https://openalex.org/W4386781444","https://openalex.org/W2150182025","https://openalex.org/W3092950680","https://openalex.org/W3197542405","https://openalex.org/W2056712470","https://openalex.org/W3125580266","https://openalex.org/W4288390103","https://openalex.org/W4317039510","https://openalex.org/W4238861846"],"abstract_inverted_index":{"In":[0,86,150],"offline":[1,45,84,226],"reinforcement":[2],"learning-based":[3],"recommender":[4,228],"systems":[5],"(RLRS),":[6],"learning":[7,82],"effective":[8],"state":[9,24,80,134,161,209],"representations":[10,25,162,174,184],"is":[11,117],"crucial":[12],"for":[13,75,211,225],"capturing":[14],"user":[15,59,148],"preferences":[16],"that":[17,33,54,97,145,182,216],"directly":[18,146],"impact":[19],"long-term":[20],"rewards.":[21],"However,":[22],"raw":[23],"often":[26],"contain":[27],"high-dimensional,":[28],"noisy":[29],"information":[30],"and":[31,79,103,139,178],"components":[32,109,135,210],"are":[34,55],"not":[35],"causally":[36,107],"relevant":[37,57,108],"to":[38,50,58,158,206],"the":[39,87,106,125,130,137,141,151,165,172,176,183,194,202],"reward.":[40],"Additionally,":[41],"missing":[42],"transitions":[43],"in":[44,83],"data":[46],"make":[47],"it":[48],"challenging":[49],"accurately":[51],"identify":[52],"features":[53],"most":[56],"satisfaction.":[60],"To":[61],"address":[62],"these":[63],"challenges,":[64],"we":[65,90,154],"propose":[66],"Policy-Guided":[67],"Causal":[68],"Representation":[69],"(PGCR),":[70],"a":[71,92,120,190],"novel":[72],"two-stage":[73],"framework":[74],"causal":[76,93,131,197],"feature":[77,94],"selection":[78,95],"representation":[81],"RLRS.":[85],"first":[88],"stage,":[89,153],"learn":[91,159],"policy":[96,116],"generates":[98],"modified":[99,179],"states":[100],"by":[101,119,163],"isolating":[102],"retaining":[104],"only":[105],"(CRCs)":[110],"while":[111],"altering":[112],"irrelevant":[113],"components.":[114],"This":[115],"guided":[118],"reward":[121,138],"function":[122],"based":[123],"on":[124,136,186],"Wasserstein":[126],"distance,":[127],"which":[128],"measures":[129],"effect":[132],"of":[133,143,175,196,204],"encourages":[140],"preservation":[142],"CRCs":[144],"influence":[147],"interests.":[149],"second":[152],"train":[155],"an":[156],"encoder":[157],"compact":[160],"minimizing":[164],"mean":[166],"squared":[167],"error":[168],"(MSE)":[169],"loss":[170],"between":[171],"latent":[173],"original":[177],"states,":[180],"ensuring":[181],"focus":[185],"CRCs.":[187],"We":[188],"provide":[189],"theoretical":[191],"analysis":[192],"proving":[193],"identifiability":[195],"effects":[198],"from":[199],"interventions,":[200],"validating":[201],"ability":[203],"PGCR":[205,217],"isolate":[207],"critical":[208],"decision-making.":[212],"Extensive":[213],"experiments":[214],"demonstrate":[215],"significantly":[218],"improves":[219],"recommendation":[220],"performance,":[221],"confirming":[222],"its":[223],"effectiveness":[224],"RL-based":[227],"systems.":[229]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-07T14:57:38.498316","created_date":"2025-10-10T00:00:00"}
