{"id":"https://openalex.org/W2963841569","doi":"https://doi.org/10.1145/3219819.3219918","title":"Deep Reinforcement Learning for Sponsored Search Real-time Bidding","display_name":"Deep Reinforcement Learning for Sponsored Search Real-time Bidding","publication_year":2018,"publication_date":"2018-07-19","ids":{"openalex":"https://openalex.org/W2963841569","doi":"https://doi.org/10.1145/3219819.3219918","mag":"2963841569"},"language":"en","primary_location":{"id":"doi:10.1145/3219819.3219918","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3219819.3219918","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100358009","display_name":"Jun Zhao","orcid":"https://orcid.org/0000-0002-3573-152X"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jun Zhao","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057484116","display_name":"Guang Qiu","orcid":"https://orcid.org/0000-0002-4669-6328"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guang Qiu","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065194313","display_name":"Ziyu Guan","orcid":"https://orcid.org/0000-0003-2413-4698"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziyu Guan","raw_affiliation_strings":["Xidian University, Xian, China"],"affiliations":[{"raw_affiliation_string":"Xidian University, Xian, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066877556","display_name":"Wei Zhao","orcid":"https://orcid.org/0000-0002-0622-9258"},"institutions":[{"id":"https://openalex.org/I149594827","display_name":"Xidian University","ror":"https://ror.org/05s92vm98","country_code":"CN","type":"education","lineage":["https://openalex.org/I149594827"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Zhao","raw_affiliation_strings":["Xidian University, Xian, China"],"affiliations":[{"raw_affiliation_string":"Xidian University, Xian, China","institution_ids":["https://openalex.org/I149594827"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102013345","display_name":"Xiaofei He","orcid":"https://orcid.org/0009-0001-9107-2354"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaofei He","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100358009"],"corresponding_institution_ids":["https://openalex.org/I45928872"],"apc_list":null,"apc_paid":null,"fwci":9.1944,"has_fulltext":false,"cited_by_count":80,"citation_normalized_percentile":{"value":0.98075031,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1021","last_page":"1030"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11161","display_name":"Consumer Market Behavior and Pricing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1406","display_name":"Marketing"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10328","display_name":"Supply Chain and Inventory Management","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bidding","display_name":"Bidding","score":0.942145824432373},{"id":"https://openalex.org/keywords/real-time-bidding","display_name":"Real-time bidding","score":0.7761409282684326},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7740482687950134},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7536401748657227},{"id":"https://openalex.org/keywords/display-advertising","display_name":"Display advertising","score":0.5662263035774231},{"id":"https://openalex.org/keywords/randomness","display_name":"Randomness","score":0.5511434078216553},{"id":"https://openalex.org/keywords/online-advertising","display_name":"Online advertising","score":0.3897871971130371},{"id":"https://openalex.org/keywords/operations-research","display_name":"Operations research","score":0.33601075410842896},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2995176315307617},{"id":"https://openalex.org/keywords/microeconomics","display_name":"Microeconomics","score":0.14309900999069214},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.10580053925514221},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1042773425579071},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.09279194474220276}],"concepts":[{"id":"https://openalex.org/C9233905","wikidata":"https://www.wikidata.org/wiki/Q3276328","display_name":"Bidding","level":2,"score":0.942145824432373},{"id":"https://openalex.org/C1525070","wikidata":"https://www.wikidata.org/wiki/Q2134714","display_name":"Real-time bidding","level":3,"score":0.7761409282684326},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7740482687950134},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7536401748657227},{"id":"https://openalex.org/C2777999536","wikidata":"https://www.wikidata.org/wiki/Q2399498","display_name":"Display advertising","level":4,"score":0.5662263035774231},{"id":"https://openalex.org/C125112378","wikidata":"https://www.wikidata.org/wiki/Q176640","display_name":"Randomness","level":2,"score":0.5511434078216553},{"id":"https://openalex.org/C512338625","wikidata":"https://www.wikidata.org/wiki/Q624902","display_name":"Online advertising","level":3,"score":0.3897871971130371},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.33601075410842896},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2995176315307617},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.14309900999069214},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.10580053925514221},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1042773425579071},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.09279194474220276},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3219819.3219918","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3219819.3219918","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4300000071525574,"id":"https://metadata.un.org/sdg/17","display_name":"Partnerships for the goals"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1498502445","https://openalex.org/W1515851193","https://openalex.org/W1757796397","https://openalex.org/W1763335505","https://openalex.org/W1825869920","https://openalex.org/W1973976093","https://openalex.org/W2011384577","https://openalex.org/W2012905273","https://openalex.org/W2021375049","https://openalex.org/W2033798573","https://openalex.org/W2039842578","https://openalex.org/W2073685064","https://openalex.org/W2092560364","https://openalex.org/W2099618002","https://openalex.org/W2117340018","https://openalex.org/W2117798265","https://openalex.org/W2119914577","https://openalex.org/W2121103318","https://openalex.org/W2121863487","https://openalex.org/W2135724610","https://openalex.org/W2141230344","https://openalex.org/W2141457763","https://openalex.org/W2145339207","https://openalex.org/W2149822245","https://openalex.org/W2290354866","https://openalex.org/W2562337727","https://openalex.org/W2963658727","https://openalex.org/W3105140685","https://openalex.org/W4233776596"],"related_works":["https://openalex.org/W4306254152","https://openalex.org/W2149822245","https://openalex.org/W2037047546","https://openalex.org/W2740620324","https://openalex.org/W2990183840","https://openalex.org/W2770888277","https://openalex.org/W2766666295","https://openalex.org/W2039842578","https://openalex.org/W3005478775","https://openalex.org/W3099947523"],"abstract_inverted_index":{"Bidding":[0],"optimization":[1],"is":[2],"one":[3],"of":[4,20,48,95,153,174,198,227,231,241],"the":[5,18,34,54,63,115,134,138,148,175,209,213,218,222,239],"most":[6],"critical":[7],"problems":[8],"in":[9,66,221],"online":[10,127,235],"advertising.":[11],"Sponsored":[12],"search":[13,68,224],"(SS)":[14],"auction,":[15,44,69],"due":[16,80],"to":[17,52,81,132,211],"randomness":[19],"user":[21,83],"query":[22,84],"behavior":[23,85],"and":[24,86,178,201,234],"platform":[25,226],"nature,":[26],"usually":[27],"adopts":[28],"keyword-level":[29],"bidding":[30,50,89,194,204],"strategies.":[31],"In":[32,58],"contrast,":[33],"display":[35],"advertising":[36],"(DA),":[37],"as":[38],"a":[39,74,108,161,167,180,193],"relatively":[40],"simpler":[41],"scenario":[42],"for":[43,56,101,113,126,183,196],"has":[45,73],"taken":[46],"advantage":[47],"real-time":[49,203],"(RTB)":[51],"boost":[53],"performance":[55],"advertisers.":[57],"this":[59],"paper,":[60],"we":[61,165,191],"consider":[62],"RTB":[64],"problem":[65],"sponsored":[67],"named":[70],"SS-RTB.":[71,184],"SS-RTB":[72,219],"much":[75],"more":[76,87],"complex":[77,88,116],"dynamic":[78,117],"environment,":[79],"stochastic":[82],"policies":[90],"based":[91],"on":[92],"multiple":[93],"keywords":[94],"an":[96],"ad.":[97],"Most":[98],"previous":[99],"methods":[100,122],"DA":[102],"cannot":[103],"be":[104],"applied.":[105],"We":[106,206,216],"propose":[107,179],"reinforcement":[109],"learning":[110],"(RL)":[111],"solution":[112],"handling":[114],"environment.":[118],"Although":[119],"some":[120],"RL":[121],"have":[123],"been":[124],"proposed":[125],"advertising,":[128],"they":[129],"all":[130],"fail":[131],"address":[133],"\"environment":[135],"changing''":[136],"problem:":[137],"state":[139],"transition":[140,158],"probabilities":[141],"vary":[142],"between":[143],"two":[144,154],"days.":[145],"Motivated":[146],"by":[147],"observation":[149],"that":[150],"auction":[151,176,225],"sequences":[152],"days":[155],"share":[156],"similar":[157],"patterns":[159],"at":[160,171],"proper":[162],"aggregation":[163],"level,":[164],"formulate":[166],"robust":[168],"MDP":[169],"model":[170,195],"hour-aggregation":[172],"level":[173],"data":[177],"control-by-model":[181],"framework":[182],"Rather":[185],"than":[186],"generating":[187],"bid":[188],"prices":[189],"directly,":[190],"decide":[192],"impressions":[197],"each":[199],"hour":[200],"perform":[202],"accordingly.":[205],"also":[207],"extend":[208],"method":[210],"handle":[212],"multi-agent":[214],"problem.":[215],"deployed":[217],"system":[220],"e-commerce":[223],"Alibaba.":[228],"Empirical":[229],"experiments":[230],"offline":[232],"evaluation":[233],"A/B":[236],"test":[237],"demonstrate":[238],"effectiveness":[240],"our":[242],"method.":[243]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":10},{"year":2021,"cited_by_count":14},{"year":2020,"cited_by_count":15},{"year":2019,"cited_by_count":10},{"year":2018,"cited_by_count":3}],"updated_date":"2026-03-04T09:10:02.777135","created_date":"2025-10-10T00:00:00"}
