{"id":"https://openalex.org/W2562337727","doi":"https://doi.org/10.1145/3018661.3018702","title":"Real-Time Bidding by Reinforcement Learning in Display Advertising","display_name":"Real-Time Bidding by Reinforcement Learning in Display Advertising","publication_year":2017,"publication_date":"2017-02-02","ids":{"openalex":"https://openalex.org/W2562337727","doi":"https://doi.org/10.1145/3018661.3018702","mag":"2562337727"},"language":"en","primary_location":{"id":"doi:10.1145/3018661.3018702","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3018661.3018702","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Tenth ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1701.02490","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Han Cai","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Han Cai","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kan Ren","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kan Ren","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Weinan Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weinan Zhang","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kleanthis Malialis","orcid":null},"institutions":[{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Kleanthis Malialis","raw_affiliation_strings":["University College London, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University College London, London, United Kingdom","institution_ids":["https://openalex.org/I45129253"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jun Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jun Wang","raw_affiliation_strings":["University College London, London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University College London, London, United Kingdom","institution_ids":["https://openalex.org/I45129253"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yong Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Yu","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":null,"display_name":"Defeng Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Defeng Guo","raw_affiliation_strings":["Vlion Inc., Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Vlion Inc., Shanghai, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":18.1569,"has_fulltext":false,"cited_by_count":162,"citation_normalized_percentile":{"value":0.99055317,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"661","last_page":"670"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11161","display_name":"Consumer Market Behavior and Pricing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1406","display_name":"Marketing"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11161","display_name":"Consumer Market Behavior and Pricing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1406","display_name":"Marketing"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11182","display_name":"Auction Theory and Applications","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9932000041007996,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bidding","display_name":"Bidding","score":0.9334999918937683},{"id":"https://openalex.org/keywords/real-time-bidding","display_name":"Real-time bidding","score":0.8515999913215637},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7218999862670898},{"id":"https://openalex.org/keywords/display-advertising","display_name":"Display advertising","score":0.6335999965667725},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.6317999958992004},{"id":"https://openalex.org/keywords/unique-bid-auction","display_name":"Unique bid auction","score":0.48669999837875366},{"id":"https://openalex.org/keywords/ebidding","display_name":"Ebidding","score":0.4756999909877777},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.44440001249313354},{"id":"https://openalex.org/keywords/bid-price","display_name":"Bid price","score":0.42080000042915344},{"id":"https://openalex.org/keywords/common-value-auction","display_name":"Common value auction","score":0.41929998993873596}],"concepts":[{"id":"https://openalex.org/C9233905","wikidata":"https://www.wikidata.org/wiki/Q3276328","display_name":"Bidding","level":2,"score":0.9334999918937683},{"id":"https://openalex.org/C1525070","wikidata":"https://www.wikidata.org/wiki/Q2134714","display_name":"Real-time bidding","level":3,"score":0.8515999913215637},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7218999862670898},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6498000025749207},{"id":"https://openalex.org/C2777999536","wikidata":"https://www.wikidata.org/wiki/Q2399498","display_name":"Display advertising","level":4,"score":0.6335999965667725},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.6317999958992004},{"id":"https://openalex.org/C205871297","wikidata":"https://www.wikidata.org/wiki/Q765312","display_name":"Unique bid auction","level":4,"score":0.48669999837875366},{"id":"https://openalex.org/C143989560","wikidata":"https://www.wikidata.org/wiki/Q17011617","display_name":"Ebidding","level":3,"score":0.4756999909877777},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.44440001249313354},{"id":"https://openalex.org/C76281924","wikidata":"https://www.wikidata.org/wiki/Q1499526","display_name":"Bid price","level":2,"score":0.42080000042915344},{"id":"https://openalex.org/C163239763","wikidata":"https://www.wikidata.org/wiki/Q5153637","display_name":"Common value auction","level":2,"score":0.41929998993873596},{"id":"https://openalex.org/C171749020","wikidata":"https://www.wikidata.org/wiki/Q4903974","display_name":"Bid shading","level":5,"score":0.4165000021457672},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.39149999618530273},{"id":"https://openalex.org/C46284986","wikidata":"https://www.wikidata.org/wiki/Q5164444","display_name":"Construction bidding","level":5,"score":0.39010000228881836},{"id":"https://openalex.org/C113336015","wikidata":"https://www.wikidata.org/wiki/Q574010","display_name":"Complete information","level":2,"score":0.3874000012874603},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.37119999527931213},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.3700999915599823},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.3483000099658966},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.33959999680519104},{"id":"https://openalex.org/C8505890","wikidata":"https://www.wikidata.org/wiki/Q605095","display_name":"Budget constraint","level":2,"score":0.3352999985218048},{"id":"https://openalex.org/C115988155","wikidata":"https://www.wikidata.org/wiki/Q3262192","display_name":"Decision problem","level":2,"score":0.33379998803138733},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.3278999924659729},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.3167000114917755},{"id":"https://openalex.org/C10431821","wikidata":"https://www.wikidata.org/wiki/Q6510174","display_name":"Learning effect","level":2,"score":0.3127000033855438},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.31150001287460327},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.305400013923645},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C122308676","wikidata":"https://www.wikidata.org/wiki/Q1331926","display_name":"Decision-making","level":3,"score":0.29109999537467957},{"id":"https://openalex.org/C83426474","wikidata":"https://www.wikidata.org/wiki/Q243758","display_name":"Vickrey auction","level":4,"score":0.28780001401901245},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C59594135","wikidata":"https://www.wikidata.org/wiki/Q5249242","display_name":"Decision model","level":2,"score":0.25929999351501465},{"id":"https://openalex.org/C187687199","wikidata":"https://www.wikidata.org/wiki/Q844880","display_name":"Search advertising","level":4,"score":0.2587999999523163},{"id":"https://openalex.org/C512338625","wikidata":"https://www.wikidata.org/wiki/Q624902","display_name":"Online advertising","level":3,"score":0.2565000057220459},{"id":"https://openalex.org/C112698675","wikidata":"https://www.wikidata.org/wiki/Q37038","display_name":"Advertising","level":1,"score":0.25429999828338623},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.2529999911785126}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3018661.3018702","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3018661.3018702","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Tenth ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1701.02490","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1701.02490","pdf_url":"https://arxiv.org/pdf/1701.02490","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1701.02490","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1701.02490","pdf_url":"https://arxiv.org/pdf/1701.02490","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1547105496","https://openalex.org/W1763335505","https://openalex.org/W1852264786","https://openalex.org/W1973081445","https://openalex.org/W1973976093","https://openalex.org/W1992556596","https://openalex.org/W2012905273","https://openalex.org/W2021375049","https://openalex.org/W2021866613","https://openalex.org/W2029523065","https://openalex.org/W2031002853","https://openalex.org/W2031528247","https://openalex.org/W2033798573","https://openalex.org/W2039842578","https://openalex.org/W2046513829","https://openalex.org/W2055079831","https://openalex.org/W2073685064","https://openalex.org/W2074694452","https://openalex.org/W2076618162","https://openalex.org/W2092560364","https://openalex.org/W2125074935","https://openalex.org/W2129670787","https://openalex.org/W2149822245","https://openalex.org/W2257979135","https://openalex.org/W2513944453","https://openalex.org/W2515050826","https://openalex.org/W3124857404","https://openalex.org/W6601295022"],"related_works":[],"abstract_inverted_index":{"The":[0,272],"majority":[1],"of":[2,69,74,86,124,145,172],"online":[3],"display":[4,15],"ads":[5],"are":[6],"served":[7],"through":[8],"real-time":[9,21,206,246],"bidding":[10,91,152,236,247],"(RTB)":[11],"---":[12],"each":[13,75,84,111],"ad":[14,35,53,87,95],"impression":[16,54,76],"is":[17,24,40,113,134,144,197,211,262],"auctioned":[18],"off":[19],"in":[20,55,243],"when":[22],"it":[23,39,143],"just":[25],"being":[26],"generated":[27,131],"from":[28,130,253],"a":[29,46,65,80,93,189,227,286],"user":[30],"visit.":[31],"To":[32],"place":[33],"an":[34,52,150,209],"automatically":[36],"and":[37,120,176,203,259,280,294],"optimally,":[38],"critical":[41],"for":[42,92,232],"advertisers":[43],"to":[44,49,83,148,215,238,298],"devise":[45,149],"learning":[47,191,233],"algorithm":[48],"cleverly":[50],"bid":[51,62,81,112,185,213],"real-time.":[56],"Most":[57],"previous":[58],"works":[59],"consider":[60],"the":[61,72,90,105,117,121,125,128,138,157,166,170,174,184,194,200,204,212,219,234,240,244,250,254,281,291],"decision":[63,186],"as":[64,188],"static":[66],"optimization":[67],"problem":[68,252],"either":[70],"treating":[71],"value":[73,267],"independently":[77],"or":[78],"setting":[79],"price":[82,214],"segment":[85],"volume.":[88],"However,":[89],"given":[94],"campaign":[96,126,139,158,260],"would":[97],"repeatedly":[98],"happen":[99],"during":[100],"its":[101],"life":[102],"span":[103],"before":[104],"budget":[106,119,159,261],"runs":[107],"out.":[108],"As":[109],"such,":[110],"strategically":[114],"correlated":[115],"by":[116,199,265],"constrained":[118],"overall":[122],"effectiveness":[123],"(e.g.,":[127],"rewards":[129],"clicks),":[132],"which":[133],"only":[135],"observed":[136],"after":[137],"has":[140],"completed.":[141],"Thus,":[142],"great":[146],"interest":[147],"optimal":[151,235],"strategy":[153],"sequentially":[154],"so":[155],"that":[156],"can":[160],"be":[161],"dynamically":[162],"allocated":[163],"across":[164],"all":[165],"available":[167],"impressions":[168],"on":[169,275,285],"basis":[171],"both":[173],"immediate":[175],"future":[177],"rewards.":[178],"In":[179],"this":[180],"paper,":[181],"we":[182,225],"formulate":[183],"process":[187],"reinforcement":[190],"problem,":[192],"where":[193],"state":[195,220,266],"space":[196],"represented":[198],"auction":[201,223,257],"information":[202],"campaign's":[205],"parameters,":[207],"while":[208],"action":[210],"set.":[216],"By":[217],"modeling":[218],"transition":[221],"via":[222],"competition,":[224],"build":[226],"Markov":[228],"Decision":[229],"Process":[230],"framework":[231],"policy":[237],"optimize":[239],"advertising":[241],"performance":[242,293],"dynamic":[245],"environment.":[248],"Furthermore,":[249],"scalability":[251],"large":[255],"real-world":[256,278],"volume":[258],"well":[263],"handled":[264],"approximation":[268],"using":[269],"neural":[270],"networks.":[271],"empirical":[273],"study":[274],"two":[276],"large-scale":[277],"datasets":[279],"live":[282],"A/B":[283],"testing":[284],"commercial":[287],"platform":[288],"have":[289],"demonstrated":[290],"superior":[292],"high":[295],"efficiency":[296],"compared":[297],"state-of-the-art":[299],"methods.":[300]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":29},{"year":2024,"cited_by_count":22},{"year":2023,"cited_by_count":26},{"year":2022,"cited_by_count":13},{"year":2021,"cited_by_count":21},{"year":2020,"cited_by_count":16},{"year":2019,"cited_by_count":21},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":3}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2017-01-06T00:00:00"}
