{"id":"https://openalex.org/W7124901087","doi":"https://doi.org/10.1109/tce.2026.3655431","title":"Reinforcement Learning-Based Data Weight Optimization for Sequential Recommendation","display_name":"Reinforcement Learning-Based Data Weight Optimization for Sequential Recommendation","publication_year":2026,"publication_date":"2026-01-19","ids":{"openalex":"https://openalex.org/W7124901087","doi":"https://doi.org/10.1109/tce.2026.3655431"},"language":null,"primary_location":{"id":"doi:10.1109/tce.2026.3655431","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tce.2026.3655431","pdf_url":null,"source":{"id":"https://openalex.org/S126824455","display_name":"IEEE Transactions on Consumer Electronics","issn_l":"0098-3063","issn":["0098-3063","1558-4127"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Consumer Electronics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115592654","display_name":"Shiquan Wang","orcid":"https://orcid.org/0000-0002-6009-1405"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shiquan Wang","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041344406","display_name":"Yicheng Di","orcid":"https://orcid.org/0000-0003-3802-2080"},"institutions":[{"id":"https://openalex.org/I111599522","display_name":"Jiangnan University","ror":"https://ror.org/04mkzax54","country_code":"CN","type":"education","lineage":["https://openalex.org/I111599522"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yicheng Di","raw_affiliation_strings":["School of Artificial Intelligence and Computer Science, Jiangnan University, Wuxi, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Computer Science, Jiangnan University, Wuxi, China","institution_ids":["https://openalex.org/I111599522"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101539801","display_name":"Jiayu Bao","orcid":"https://orcid.org/0009-0004-6690-427X"},"institutions":[{"id":"https://openalex.org/I111599522","display_name":"Jiangnan University","ror":"https://ror.org/04mkzax54","country_code":"CN","type":"education","lineage":["https://openalex.org/I111599522"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiayu Bao","raw_affiliation_strings":["School of Artificial Intelligence and Computer Science, Jiangnan University, Wuxi, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Computer Science, Jiangnan University, Wuxi, China","institution_ids":["https://openalex.org/I111599522"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001957036","display_name":"Zhuolong Jiang","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhuolong Jiang","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123380290","display_name":"Hongjian Shi","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongjian Shi","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123399421","display_name":"Ruhui Ma","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruhui Ma","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123434334","display_name":"Xin Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I910623958","display_name":"Bombardier (Canada)","ror":"https://ror.org/03fbkqs26","country_code":"CA","type":"company","lineage":["https://openalex.org/I910623958"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Xin Gao","raw_affiliation_strings":["Bombardier NUG Signalling Solutions Company Ltd., Changzhou, China"],"affiliations":[{"raw_affiliation_string":"Bombardier NUG Signalling Solutions Company Ltd., Changzhou, China","institution_ids":["https://openalex.org/I910623958"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123375236","display_name":"Zhiwei Song","orcid":null},"institutions":[{"id":"https://openalex.org/I890505096","display_name":"Singer (United States)","ror":"https://ror.org/017qqw141","country_code":"US","type":"company","lineage":["https://openalex.org/I890505096"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhiwei Song","raw_affiliation_strings":["SingPilot Pte. Ltd., Ayer Rajah Crescent, Singapore"],"affiliations":[{"raw_affiliation_string":"SingPilot Pte. Ltd., Ayer Rajah Crescent, Singapore","institution_ids":["https://openalex.org/I890505096"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100725148","display_name":"Yuan Hong","orcid":"https://orcid.org/0000-0003-4095-4506"},"institutions":[{"id":"https://openalex.org/I4210153682","display_name":"Intelligent Health (United Kingdom)","ror":"https://ror.org/0576zak10","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210153682"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Hong Yuan","raw_affiliation_strings":["UGO-AI Intelligent Technology (Shanghai) Company Ltd., Shanghai, China"],"affiliations":[{"raw_affiliation_string":"UGO-AI Intelligent Technology (Shanghai) Company Ltd., Shanghai, China","institution_ids":["https://openalex.org/I4210153682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123442332","display_name":"Yuan Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I111599522","display_name":"Jiangnan University","ror":"https://ror.org/04mkzax54","country_code":"CN","type":"education","lineage":["https://openalex.org/I111599522"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuan Liu","raw_affiliation_strings":["School of Artificial Intelligence and Computer Science, Jiangnan University, Wuxi, China"],"affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Computer Science, Jiangnan University, Wuxi, China","institution_ids":["https://openalex.org/I111599522"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085406721","display_name":"Haibing Guan","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haibing Guan","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5115592654"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.1120467,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"72","issue":"1","first_page":"2331","last_page":"2346"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.8986999988555908,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.8986999988555908,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.014800000004470348,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.009700000286102295,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7113000154495239},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5982000231742859},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.41839998960494995},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.39469999074935913},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3840000033378601},{"id":"https://openalex.org/keywords/recommender-system","display_name":"Recommender system","score":0.37139999866485596},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.3682999908924103},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.3384000062942505}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8123999834060669},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7113000154495239},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5982000231742859},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5920000076293945},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5813000202178955},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4194999933242798},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.41839998960494995},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.39469999074935913},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3840000033378601},{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.37139999866485596},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.3682999908924103},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.3384000062942505},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.33079999685287476},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.3158000111579895},{"id":"https://openalex.org/C2779280203","wikidata":"https://www.wikidata.org/wiki/Q17121211","display_name":"Small data","level":2,"score":0.2815999984741211},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.26919999718666077},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C206688291","wikidata":"https://www.wikidata.org/wiki/Q7617819","display_name":"Stochastic gradient descent","level":3,"score":0.26190000772476196},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.2612999975681305},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.257999986410141},{"id":"https://openalex.org/C2781002164","wikidata":"https://www.wikidata.org/wiki/Q6822311","display_name":"Meta learning (computer science)","level":3,"score":0.25679999589920044},{"id":"https://openalex.org/C2989514635","wikidata":"https://www.wikidata.org/wiki/Q5164377","display_name":"Constrained optimization problem","level":3,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tce.2026.3655431","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tce.2026.3655431","pdf_url":null,"source":{"id":"https://openalex.org/S126824455","display_name":"IEEE Transactions on Consumer Electronics","issn_l":"0098-3063","issn":["0098-3063","1558-4127"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Consumer Electronics","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W2171279286","https://openalex.org/W2783272285","https://openalex.org/W2907091645","https://openalex.org/W2963367478","https://openalex.org/W2963696295","https://openalex.org/W2984100107","https://openalex.org/W3008684691","https://openalex.org/W3033429809","https://openalex.org/W3133849783","https://openalex.org/W3178835722","https://openalex.org/W4224316819","https://openalex.org/W4296591867","https://openalex.org/W4312412605","https://openalex.org/W4313156773","https://openalex.org/W4362647036","https://openalex.org/W4386212337","https://openalex.org/W4386728933","https://openalex.org/W4387848745","https://openalex.org/W4387917717","https://openalex.org/W4390412407","https://openalex.org/W4392367398","https://openalex.org/W4396758712","https://openalex.org/W4400531852","https://openalex.org/W4401042304","https://openalex.org/W4401090676","https://openalex.org/W4401544324","https://openalex.org/W4406089761","https://openalex.org/W4407108493","https://openalex.org/W4407375814","https://openalex.org/W4407403520","https://openalex.org/W4408353595","https://openalex.org/W4411549477","https://openalex.org/W7133239020"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,13],"recommendation":[3,38,120],"systems":[4],"have":[5],"highlighted":[6],"the":[7,62,153],"critical":[8],"importance":[9,51,95],"of":[10,152],"data":[11,25,33,75,82,115],"quality":[12,34],"model":[14,43],"performance.":[15],"In":[16],"this":[17],"paper,":[18],"we":[19],"propose":[20],"a":[21,57],"reinforcement":[22],"learning":[23],"based":[24],"weight":[26],"optimization":[27,98],"framework,":[28,67],"termed":[29],"RLWORec,":[30],"to":[31,53,132,145],"enhance":[32],"for":[35],"both":[36],"small":[37,130],"models":[39,131,144],"and":[40,72],"large":[41,143],"language":[42],"(LLM)":[44],"fine-tuning":[45],"scenarios.":[46],"By":[47],"dynamically":[48],"assigning":[49],"continuous":[50],"weights":[52],"training":[54,125,155],"samples":[55],"via":[56],"policy":[58],"gradient":[59],"method":[60,128],"under":[61],"Proximal":[63],"Policy":[64],"Optimization":[65],"(PPO)":[66],"our":[68],"approach":[69],"effectively":[70],"identifies":[71],"filters":[73],"noisy":[74],"while":[76,141],"preserving":[77],"informative":[78],"samples.":[79],"Unlike":[80],"traditional":[81],"selection":[83,116],"methods":[84],"that":[85,110],"rely":[86],"on":[87,105],"static":[88],"scoring":[89],"mechanisms,":[90],"RLWORec":[91,111],"adaptively":[92],"learns":[93],"sample":[94],"through":[96],"iterative":[97],"with":[99,122,149],"global":[100],"performance":[101,121,135],"feedback.":[102],"Extensive":[103],"experiments":[104],"three":[106],"real-world":[107],"datasets":[108],"demonstrate":[109],"consistently":[112],"outperforms":[113],"state-of-the-art":[114],"baselines,":[117],"achieving":[118],"superior":[119],"significantly":[123],"reduced":[124],"data.":[126,156],"Our":[127],"enables":[129],"exceed":[133],"full-dataset":[134],"using":[136],"only":[137],"carefully":[138],"selected":[139],"subsets,":[140],"allowing":[142],"achieve":[146],"comparable":[147],"results":[148],"merely":[150],"2%":[151],"original":[154]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2026-01-21T00:00:00"}
