{"id":"https://openalex.org/W4398150962","doi":"https://doi.org/10.1109/tkde.2024.3402649","title":"CIPPO: Contrastive Imitation Proximal Policy Optimization for Recommendation Based on Reinforcement Learning","display_name":"CIPPO: Contrastive Imitation Proximal Policy Optimization for Recommendation Based on Reinforcement Learning","publication_year":2024,"publication_date":"2024-05-20","ids":{"openalex":"https://openalex.org/W4398150962","doi":"https://doi.org/10.1109/tkde.2024.3402649"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2024.3402649","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2024.3402649","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049681029","display_name":"Weilong Chen","orcid":"https://orcid.org/0000-0003-2202-601X"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weilong Chen","raw_affiliation_strings":["School of Computer Science and Engineering, University of Electronic Science and Technology of China (UESTC), Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, University of Electronic Science and Technology of China (UESTC), Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114949466","display_name":"Shao\u2010Liang Zhang","orcid":"https://orcid.org/0000-0001-9214-261X"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaoliang Zhang","raw_affiliation_strings":["WeChat Search Application Department, Tencent, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"WeChat Search Application Department, Tencent, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101577090","display_name":"Ruobing Xie","orcid":"https://orcid.org/0000-0003-3170-5647"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruobing Xie","raw_affiliation_strings":["WeChat Search Application Department, Tencent, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"WeChat Search Application Department, Tencent, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102973664","display_name":"Feng Xia","orcid":"https://orcid.org/0000-0001-5279-9908"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Xia","raw_affiliation_strings":["WeChat Search Application Department, Tencent, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"WeChat Search Application Department, Tencent, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023086553","display_name":"Leyu Lin","orcid":"https://orcid.org/0000-0001-5471-500X"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Leyu Lin","raw_affiliation_strings":["WeChat Search Application Department, Tencent, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"WeChat Search Application Department, Tencent, Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100775080","display_name":"Xinran Zhang","orcid":"https://orcid.org/0000-0002-9250-8711"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinran Zhang","raw_affiliation_strings":["School of Computer Science and Engineering, University of Electronic Science and Technology of China (UESTC), Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, University of Electronic Science and Technology of China (UESTC), Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100322613","display_name":"Yan Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Wang","raw_affiliation_strings":["School of Computer Science and Engineering, University of Electronic Science and Technology of China (UESTC), Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, University of Electronic Science and Technology of China (UESTC), Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100632845","display_name":"Yanru Zhang","orcid":"https://orcid.org/0000-0003-4182-2150"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]},{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanru Zhang","raw_affiliation_strings":["School of Computer Science and Engineering, University of Electronic Science and Technology of China (UESTC), Chengdu, China","Shenzhen Institute for Advanced Study of UESTC, Shenzhen, China","School of Computer Science and Engineering, UESTC, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, University of Electronic Science and Technology of China (UESTC), Chengdu, China","institution_ids":["https://openalex.org/I150229711"]},{"raw_affiliation_string":"Shenzhen Institute for Advanced Study of UESTC, Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]},{"raw_affiliation_string":"School of Computer Science and Engineering, UESTC, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5049681029"],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":2.3383,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.90062586,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"36","issue":"11","first_page":"5753","last_page":"5767"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9196000099182129,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9196000099182129,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.83409583568573},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7212175726890564},{"id":"https://openalex.org/keywords/imitation","display_name":"Imitation","score":0.6043508052825928},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47345229983329773},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3353436589241028}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.83409583568573},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7212175726890564},{"id":"https://openalex.org/C126388530","wikidata":"https://www.wikidata.org/wiki/Q1131737","display_name":"Imitation","level":2,"score":0.6043508052825928},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47345229983329773},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3353436589241028},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tkde.2024.3402649","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2024.3402649","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3405505986","display_name":null,"funder_award_id":"2023-CY02-00003-GX","funder_id":"https://openalex.org/F4320326670","funder_display_name":"Chengdu Science and Technology Bureau"}],"funders":[{"id":"https://openalex.org/F4320326670","display_name":"Chengdu Science and Technology Bureau","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W1191599655","https://openalex.org/W1515851193","https://openalex.org/W2004147962","https://openalex.org/W2108862644","https://openalex.org/W2119717200","https://openalex.org/W2145339207","https://openalex.org/W2295739661","https://openalex.org/W2340526403","https://openalex.org/W2471222571","https://openalex.org/W2475334473","https://openalex.org/W2604662567","https://openalex.org/W2736601468","https://openalex.org/W2741274947","https://openalex.org/W2746553466","https://openalex.org/W2787933113","https://openalex.org/W2788295351","https://openalex.org/W2788862220","https://openalex.org/W2799544270","https://openalex.org/W2804930149","https://openalex.org/W2805805280","https://openalex.org/W2898085636","https://openalex.org/W2902572901","https://openalex.org/W2948345531","https://openalex.org/W2953981431","https://openalex.org/W2963608065","https://openalex.org/W2963654596","https://openalex.org/W2973229164","https://openalex.org/W2990460121","https://openalex.org/W2991355586","https://openalex.org/W2997130580","https://openalex.org/W3009561768","https://openalex.org/W3012881846","https://openalex.org/W3034853385","https://openalex.org/W3049342604","https://openalex.org/W3092490845","https://openalex.org/W3101704389","https://openalex.org/W3102778384","https://openalex.org/W3102899483","https://openalex.org/W3116249021","https://openalex.org/W3127756416","https://openalex.org/W3157410348","https://openalex.org/W3165964024","https://openalex.org/W3173331009","https://openalex.org/W3176915718","https://openalex.org/W3201286590","https://openalex.org/W3208338073","https://openalex.org/W3209481761","https://openalex.org/W3211540990","https://openalex.org/W4221164312","https://openalex.org/W4283796596","https://openalex.org/W4288110508","https://openalex.org/W4289709987","https://openalex.org/W4290827187","https://openalex.org/W4296604485","https://openalex.org/W4313156423","https://openalex.org/W4384891727"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4364306694","https://openalex.org/W4312192474","https://openalex.org/W4283697347"],"abstract_inverted_index":{"Recommendation":[0],"systems,":[1],"widely":[2],"adopted":[3],"in":[4,131,185],"social":[5],"networks,":[6],"personalize":[7],"user":[8],"experiences":[9],"through":[10],"advanced":[11],"technologies":[12],"such":[13],"as":[14],"Reinforcement":[15],"Learning":[16],"(RL),":[17],"known":[18],"for":[19,94],"producing":[20],"high-performance,":[21],"list-":[22],"wise":[23],"recommendations.":[24],"However,":[25],"RL-based":[26],"recommendation":[27,95],"methods":[28],"exhibit":[29],"biases,":[30],"specifically:":[31],"1)":[32],"Online":[33],"bias,":[34,58],"which":[35],"stems":[36],"from":[37,63],"a":[38,53,59,84,108,137,189],"complex":[39],"real-world":[40],"<italic":[41,67,73],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[42,68,74],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">online":[43],"policy</i>":[44,70,76],"composed":[45],"of":[46,163],"various":[47],"rules":[48],"and":[49,71,106,124,150,156,170,177,184],"models":[50],"rather":[51],"than":[52],"single":[54],"policy;":[55],"2)":[56],"Training":[57],"distributional":[60,145],"shift":[61,146],"resulting":[62],"differences":[64],"between":[65,122,147],"the":[66,72,116,128,144,148,161,175],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">target":[69],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">behavior":[75],".":[77],"To":[78],"address":[79],"these":[80],"issues,":[81],"we":[82],"introduce":[83],"novel":[85],"framework":[86],"named":[87],"Contrastive":[88],"Imitation":[89,110],"Proximal":[90,132],"Policy":[91,133],"Optimization":[92],"(CIPPO)":[93],"based":[96],"on":[97,182],"RL.":[98],"This":[99],"approach":[100],"leverages":[101],"extensively":[102],"labeled":[103],"feedback":[104],"data":[105],"incorporates":[107],"Masked":[109],"Network":[111],"(MIN)":[112],"that":[113],"closely":[114],"emulates":[115],"online":[117,123,157],"policy,":[118],"thus":[119],"reducing":[120],"discrepancies":[121],"offline":[125,155],"environments.":[126],"Additionally,":[127],"clipping":[129],"function":[130],"Optimization,":[134],"combined":[135],"with":[136,193],"specially":[138],"designed":[139],"contrastive":[140],"module,":[141],"effectively":[142],"reduces":[143],"behavior":[149],"target":[151],"policies.":[152],"We":[153],"conduct":[154],"experiments":[158],"to":[159,173],"show":[160],"improvements":[162],"CIPPO,":[164],"providing":[165],"details":[166],"including":[167],"ablation":[168],"tests":[169],"parameter":[171],"analysis":[172],"validate":[174],"effectiveness":[176],"robustness.":[178],"CIPPO":[179],"gains":[180],"12.79%":[181],"ACN":[183],"WeChat":[186],"Top":[187],"Stories,":[188],"large":[190],"media":[191],"platform":[192],"over":[194],"50":[195],"million":[196],"users.":[197]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
