{"id":"https://openalex.org/W4401857395","doi":"https://doi.org/10.1145/3637528.3671555","title":"Offline Reinforcement Learning for Optimizing Production Bidding Policies","display_name":"Offline Reinforcement Learning for Optimizing Production Bidding Policies","publication_year":2024,"publication_date":"2024-08-24","ids":{"openalex":"https://openalex.org/W4401857395","doi":"https://doi.org/10.1145/3637528.3671555"},"language":"en","primary_location":{"id":"doi:10.1145/3637528.3671555","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3637528.3671555","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3637528.3671555","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3637528.3671555","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5106708679","display_name":"Dmytro Korenkevych","orcid":null},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dmytro Korenkevych","raw_affiliation_strings":["AI at Meta, Menlo Park, USA"],"affiliations":[{"raw_affiliation_string":"AI at Meta, Menlo Park, USA","institution_ids":["https://openalex.org/I4210099336"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100885139","display_name":"Frank Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Frank Cheng","raw_affiliation_strings":["AI at Meta, Sunnyvale, USA"],"affiliations":[{"raw_affiliation_string":"AI at Meta, Sunnyvale, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067823854","display_name":"Artsiom Balakir","orcid":null},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Artsiom Balakir","raw_affiliation_strings":["AI at Meta, Menlo Park, USA"],"affiliations":[{"raw_affiliation_string":"AI at Meta, Menlo Park, USA","institution_ids":["https://openalex.org/I4210099336"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092728750","display_name":"Alex Nikulkov","orcid":"https://orcid.org/0009-0008-2335-289X"},"institutions":[{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alex Nikulkov","raw_affiliation_strings":["AI at Meta, Bellevue, USA"],"affiliations":[{"raw_affiliation_string":"AI at Meta, Bellevue, USA","institution_ids":["https://openalex.org/I4210108985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106708680","display_name":"Lingnan Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lingnan Gao","raw_affiliation_strings":["Meta Platform Inc., Menlo Park, USA"],"affiliations":[{"raw_affiliation_string":"Meta Platform Inc., Menlo Park, USA","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042631901","display_name":"Zhihao Cen","orcid":"https://orcid.org/0009-0009-5967-3895"},"institutions":[{"id":"https://openalex.org/I4210099336","display_name":"Menlo School","ror":"https://ror.org/01240pn49","country_code":"US","type":"education","lineage":["https://openalex.org/I4210099336"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhihao Cen","raw_affiliation_strings":["AI at Meta, Menlo Park, USA"],"affiliations":[{"raw_affiliation_string":"AI at Meta, Menlo Park, USA","institution_ids":["https://openalex.org/I4210099336"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049517124","display_name":"Zuobing Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zuobing Xu","raw_affiliation_strings":["Meta Platform Inc., Menlo Park, USA"],"affiliations":[{"raw_affiliation_string":"Meta Platform Inc., Menlo Park, USA","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050765125","display_name":"Zheqing Zhu","orcid":"https://orcid.org/0000-0002-1162-106X"},"institutions":[{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zheqing Zhu","raw_affiliation_strings":["AI at Meta, Bellevue, USA"],"affiliations":[{"raw_affiliation_string":"AI at Meta, Bellevue, USA","institution_ids":["https://openalex.org/I4210108985"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5106708679"],"corresponding_institution_ids":["https://openalex.org/I4210099336"],"apc_list":null,"apc_paid":null,"fwci":0.3492,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.62990161,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"5251","last_page":"5259"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10551","display_name":"Scheduling and Optimization Algorithms","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10551","display_name":"Scheduling and Optimization Algorithms","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11814","display_name":"Advanced Manufacturing and Logistics Optimization","score":0.9804999828338623,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10328","display_name":"Supply Chain and Inventory Management","score":0.9793999791145325,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bidding","display_name":"Bidding","score":0.8942543864250183},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8886563777923584},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6889672875404358},{"id":"https://openalex.org/keywords/production","display_name":"Production (economics)","score":0.6669731140136719},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4283779561519623},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.31671032309532166},{"id":"https://openalex.org/keywords/business","display_name":"Business","score":0.14124786853790283},{"id":"https://openalex.org/keywords/microeconomics","display_name":"Microeconomics","score":0.13656070828437805},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11480012536048889},{"id":"https://openalex.org/keywords/marketing","display_name":"Marketing","score":0.08772164583206177},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.0719466507434845}],"concepts":[{"id":"https://openalex.org/C9233905","wikidata":"https://www.wikidata.org/wiki/Q3276328","display_name":"Bidding","level":2,"score":0.8942543864250183},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8886563777923584},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6889672875404358},{"id":"https://openalex.org/C2778348673","wikidata":"https://www.wikidata.org/wiki/Q739302","display_name":"Production (economics)","level":2,"score":0.6669731140136719},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4283779561519623},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31671032309532166},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.14124786853790283},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.13656070828437805},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11480012536048889},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.08772164583206177},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0719466507434845},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3637528.3671555","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3637528.3671555","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3637528.3671555","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3637528.3671555","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3637528.3671555","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3637528.3671555","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4401857395.pdf"},"referenced_works_count":28,"referenced_works":["https://openalex.org/W1969591148","https://openalex.org/W1973081445","https://openalex.org/W1973976093","https://openalex.org/W2021375049","https://openalex.org/W2031528247","https://openalex.org/W2039842578","https://openalex.org/W2149822245","https://openalex.org/W2290203315","https://openalex.org/W2562337727","https://openalex.org/W2592434961","https://openalex.org/W2788376840","https://openalex.org/W2945611146","https://openalex.org/W2963393294","https://openalex.org/W2963841569","https://openalex.org/W2990747716","https://openalex.org/W2998145016","https://openalex.org/W2998338655","https://openalex.org/W3033324992","https://openalex.org/W3099947523","https://openalex.org/W3101243714","https://openalex.org/W3166393923","https://openalex.org/W3183892796","https://openalex.org/W3198114340","https://openalex.org/W4212774754","https://openalex.org/W4220737776","https://openalex.org/W4281756788","https://openalex.org/W4306311772","https://openalex.org/W4367046927"],"related_works":["https://openalex.org/W2355561715","https://openalex.org/W2355326491","https://openalex.org/W2389286292","https://openalex.org/W2360751371","https://openalex.org/W2387920521","https://openalex.org/W2389754756","https://openalex.org/W2382224273","https://openalex.org/W2373538886","https://openalex.org/W2360290312","https://openalex.org/W2369836678"],"abstract_inverted_index":{"The":[0],"online":[1],"advertising":[2,29],"market,":[3],"with":[4,82,151,225],"its":[5],"thousands":[6],"of":[7,79,217],"auctions":[8],"run":[9],"per":[10],"second,":[11],"presents":[12],"a":[13,25,70,87,118,142],"daunting":[14],"challenge":[15],"for":[16,45],"advertisers":[17],"who":[18],"wish":[19],"to":[20,35,43,66,74,90,111,194],"optimize":[21,112],"their":[22,36,41],"spend":[23],"under":[24],"budget":[26],"constraint.":[27],"Thus,":[28],"platforms":[30],"typically":[31],"provide":[32],"automated":[33],"agents":[34,56],"customers,":[37],"which":[38,124],"act":[39],"on":[40,122],"behalf":[42],"bid":[44],"impression":[46],"opportunities":[47],"in":[48,94,185],"real":[49,100],"time":[50],"at":[51],"scale.":[52],"Because":[53],"these":[54],"proxy":[55],"are":[57,162],"owned":[58],"by":[59,97,135],"the":[60,80,125,136,157,166,195,215],"platform":[61],"but":[62],"use":[63,141],"advertiser":[64,126],"funds":[65],"operate,":[67],"there":[68],"is":[69,170],"strong":[71],"practical":[72],"need":[73],"balance":[75],"reliability":[76],"and":[77,130,165,188],"explainability":[78,209],"agent":[81,144],"optimizing":[83,91],"power.":[84],"We":[85,140,174],"propose":[86],"generalizable":[88],"approach":[89,107,201],"bidding":[92,191,198],"policies":[93,150],"production":[95,190,197,219],"environments":[96,192],"learning":[98],"from":[99],"data":[101,133],"using":[102],"offline":[103],"reinforcement":[104],"learning.":[105],"This":[106],"can":[108,127],"be":[109],"used":[110],"any":[113],"differentiable":[114],"base":[115,137,149,159],"policy":[116,120,138,160],"(practically,":[117],"heuristic":[119],"based":[121],"principles":[123],"easily":[128],"understand),":[129],"only":[131,156],"requires":[132],"generated":[134],"itself.":[139],"hybrid":[143],"architecture":[145,179],"that":[146,176],"combines":[147],"arbitrary":[148],"deep":[152],"neural":[153,167,230],"networks,":[154],"where":[155],"optimized":[158],"parameters":[161,216],"eventually":[163],"deployed,":[164],"network":[168],"part":[169],"discarded":[171],"after":[172],"training.":[173],"demonstrate":[175],"such":[177],"an":[178],"achieves":[180],"statistically":[181],"significant":[182],"performance":[183],"gains":[184],"both":[186],"simulated":[187],"at-scale":[189],"compared":[193],"default":[196],"policy.":[199],"Our":[200],"does":[202],"not":[203],"incur":[204],"additional":[205],"infrastructure,":[206],"safety,":[207],"or":[208],"costs,":[210],"as":[211],"it":[212],"directly":[213],"optimizes":[214],"existing":[218],"routines":[220],"without":[221],"necessarily":[222],"replacing":[223],"them":[224],"black":[226],"box-style":[227],"models":[228],"like":[229],"networks.":[231]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
