{"id":"https://openalex.org/W2963654596","doi":"https://doi.org/10.1609/aaai.v33i01.33014902","title":"Virtual-Taobao: Virtualizing Real-World Online Retail Environment for Reinforcement Learning","display_name":"Virtual-Taobao: Virtualizing Real-World Online Retail Environment for Reinforcement Learning","publication_year":2019,"publication_date":"2019-07-17","ids":{"openalex":"https://openalex.org/W2963654596","doi":"https://doi.org/10.1609/aaai.v33i01.33014902","mag":"2963654596"},"language":"en","primary_location":{"id":"doi:10.1609/aaai.v33i01.33014902","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33014902","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4419/4297","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4419/4297","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114321321","display_name":"Jing-Cheng Shi","orcid":"https://orcid.org/0000-0002-3688-4069"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jing-Cheng Shi","raw_affiliation_strings":["Nanjing University"],"affiliations":[{"raw_affiliation_string":"Nanjing University","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101458462","display_name":"Yang Yu","orcid":"https://orcid.org/0000-0002-1052-5447"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Yu","raw_affiliation_strings":["Nanjing University"],"affiliations":[{"raw_affiliation_string":"Nanjing University","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070849903","display_name":"Qing Da","orcid":"https://orcid.org/0000-0003-2200-0098"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qing Da","raw_affiliation_strings":["Alibaba Group"],"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101922392","display_name":"Shiyong Chen","orcid":"https://orcid.org/0009-0005-6293-7792"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shi-Yong Chen","raw_affiliation_strings":["Alibaba Group"],"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076875538","display_name":"Anxiang Zeng","orcid":"https://orcid.org/0000-0003-3869-5357"},"institutions":[{"id":"https://openalex.org/I4210095624","display_name":"Alibaba Group (United States)","ror":"https://ror.org/00rn0m335","country_code":"US","type":"company","lineage":["https://openalex.org/I4210095624","https://openalex.org/I45928872"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"An-Xiang Zeng","raw_affiliation_strings":["Alibaba Group"],"affiliations":[{"raw_affiliation_string":"Alibaba Group","institution_ids":["https://openalex.org/I4210095624"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5114321321"],"corresponding_institution_ids":["https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":9.3105,"has_fulltext":true,"cited_by_count":143,"citation_normalized_percentile":{"value":0.98041533,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"33","issue":"01","first_page":"4902","last_page":"4909"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9596999883651733,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9596999883651733,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9473999738693237,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11326","display_name":"Stock Market Forecasting Methods","score":0.9466000199317932,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8986325263977051},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7955853343009949},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46779781579971313},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.4523315727710724},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.43678203225135803},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.16545099020004272}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8986325263977051},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7955853343009949},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46779781579971313},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.4523315727710724},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43678203225135803},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.16545099020004272}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v33i01.33014902","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33014902","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4419/4297","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v33i01.33014902","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v33i01.33014902","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/4419/4297","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.5400000214576721,"id":"https://metadata.un.org/sdg/9"}],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3867600133","display_name":null,"funder_award_id":"2017001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5469316478","display_name":null,"funder_award_id":"61876077","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5939423041","display_name":null,"funder_award_id":"Technology","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7174558747","display_name":null,"funder_award_id":"Group","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2963654596.pdf","grobid_xml":"https://content.openalex.org/works/W2963654596.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W1771410628","https://openalex.org/W1931877416","https://openalex.org/W1983217982","https://openalex.org/W1986014385","https://openalex.org/W2051228319","https://openalex.org/W2090373601","https://openalex.org/W2142641780","https://openalex.org/W2166302491","https://openalex.org/W2227909145","https://openalex.org/W2257979135","https://openalex.org/W2260756217","https://openalex.org/W2290104316","https://openalex.org/W2560678327","https://openalex.org/W2580495915","https://openalex.org/W2591957724","https://openalex.org/W2601322194","https://openalex.org/W2790532312","https://openalex.org/W2808980893","https://openalex.org/W2963064439","https://openalex.org/W2963277051","https://openalex.org/W2964043796","https://openalex.org/W4298857966","https://openalex.org/W4320013936","https://openalex.org/W4394670483","https://openalex.org/W6638018090","https://openalex.org/W6646884813","https://openalex.org/W6673066118","https://openalex.org/W6680724558","https://openalex.org/W6718092244"],"related_works":["https://openalex.org/W2989932438","https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W2186333919","https://openalex.org/W4387297750","https://openalex.org/W3046775127","https://openalex.org/W3107602296","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W4313488044"],"abstract_inverted_index":{"Applying":[0],"reinforcement":[1,24,38,66,222],"learning":[2,25,39,67,223],"in":[3,27,44,68,92,189,224],"physical-world":[4],"tasks":[5],"is":[6,10,156],"extremely":[7],"challenging.":[8],"It":[9],"commonly":[11],"infeasible":[12],"to":[13,148,203],"sample":[14],"a":[15,28,55,59,79],"large":[16],"number":[17],"of":[18,47,64,138,160,162,177],"trials,":[19],"as":[20],"required":[21],"by":[22],"current":[23],"methods,":[26],"physical":[29,56,96,194,226],"environment.":[30,180],"This":[31],"paper":[32],"reports":[33],"our":[34,73],"project":[35],"on":[36,220],"using":[37],"for":[40,108,111,126],"better":[41,116,128],"commodity":[42],"search":[43],"Taobao,":[45,171],"one":[46],"the":[48,101,136,139,150,169,178,185,204],"largest":[49],"online":[50,209],"retail":[51],"platforms":[52],"and":[53,87],"meanwhile":[54],"environment":[57],"with":[58,94,115,168],"high":[60],"sampling":[61,97,195],"cost.":[62],"Instead":[63],"training":[65],"Taobao":[69,164],"directly,":[70],"we":[71,76,89,104,119,141],"present":[72],"environment-building":[74],"approach:":[75],"build":[77],"Virtual-Taobao,":[78,190],"simulator":[80],"learned":[81],"from":[82,158],"historical":[83],"customer":[84,112,130],"behavior":[85],"data,":[86],"then":[88],"train":[90],"policies":[91,186],"Virtual-Taobao":[93,155,172],"no":[95],"costs.":[98],"To":[99,132],"improve":[100],"simulation":[102],"precision,":[103],"propose":[105,120,142],"GAN-SD":[106],"(GAN":[107],"Simulating":[109],"Distributions)":[110],"feature":[113],"generation":[114],"matched":[117],"distribution;":[118],"MAIL":[121],"(Multiagent":[122],"Adversarial":[123],"Imitation":[124],"Learning)":[125],"generating":[127],"generalizable":[129],"actions.":[131],"further":[133,182],"avoid":[134],"overfitting":[135],"imperfection":[137],"simulator,":[140],"ANC":[143],"(Action":[144],"Norm":[145],"Constraint)":[146],"strategy":[147],"regularize":[149],"policy":[151],"model.":[152],"In":[153],"experiments,":[154],"trained":[157,187],"hundreds":[159],"millions":[161],"real":[163,170,179],"customers\u2019":[165],"records.":[166],"Compared":[167],"faithfully":[173],"recovers":[174],"important":[175],"properties":[176],"We":[181,212],"show":[183],"that":[184],"purely":[188],"which":[191],"has":[192],"zero":[193],"cost,":[196],"can":[197],"have":[198],"significantly":[199],"superior":[200],"real-world":[201],"performance":[202],"traditional":[205],"supervised":[206],"approaches,":[207],"through":[208],"A/B":[210],"tests.":[211],"hope":[213],"this":[214],"work":[215],"may":[216],"shed":[217],"some":[218],"light":[219],"applying":[221],"complex":[225],"environments.":[227]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":13},{"year":2024,"cited_by_count":25},{"year":2023,"cited_by_count":32},{"year":2022,"cited_by_count":19},{"year":2021,"cited_by_count":24},{"year":2020,"cited_by_count":18},{"year":2019,"cited_by_count":9},{"year":2018,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
