{"id":"https://openalex.org/W4291125977","doi":"https://doi.org/10.1145/3534678.3539095","title":"Reinforcement Learning in the Wild: Scalable RL Dispatching Algorithm Deployed in Ridehailing Marketplace","display_name":"Reinforcement Learning in the Wild: Scalable RL Dispatching Algorithm Deployed in Ridehailing Marketplace","publication_year":2022,"publication_date":"2022-08-12","ids":{"openalex":"https://openalex.org/W4291125977","doi":"https://doi.org/10.1145/3534678.3539095"},"language":"en","primary_location":{"id":"doi:10.1145/3534678.3539095","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3534678.3539095","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3534678.3539095","source":{"id":"https://openalex.org/S4363608767","display_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3534678.3539095","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058487746","display_name":"Soheil Sadeghi Eshkevari","orcid":"https://orcid.org/0000-0001-9285-6911"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Soheil Sadeghi Eshkevari","raw_affiliation_strings":["DiDi Labs, Mountain View, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DiDi Labs, Mountain View, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101670036","display_name":"Xiaocheng Tang","orcid":"https://orcid.org/0000-0003-1009-9879"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaocheng Tang","raw_affiliation_strings":["DiDi Labs, Mountain View, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DiDi Labs, Mountain View, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085579946","display_name":"Zhiwei Qin","orcid":"https://orcid.org/0000-0001-5383-4816"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhiwei Qin","raw_affiliation_strings":["DiDi Labs, Mountain View, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DiDi Labs, Mountain View, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085507457","display_name":"Jinhan Mei","orcid":null},"institutions":[{"id":"https://openalex.org/I4401726870","display_name":"Didi Chuxing (China)","ror":"https://ror.org/02ksqcf75","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726870"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinhan Mei","raw_affiliation_strings":["DiDi Chuxing, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DiDi Chuxing, Beijing, China","institution_ids":["https://openalex.org/I4401726870"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100439650","display_name":"Cheng Zhang","orcid":"https://orcid.org/0000-0002-5803-2860"},"institutions":[{"id":"https://openalex.org/I4401726870","display_name":"Didi Chuxing (China)","ror":"https://ror.org/02ksqcf75","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726870"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng Zhang","raw_affiliation_strings":["DiDi Chuxing, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DiDi Chuxing, Beijing, China","institution_ids":["https://openalex.org/I4401726870"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035296344","display_name":"Qianying Meng","orcid":null},"institutions":[{"id":"https://openalex.org/I4401726870","display_name":"Didi Chuxing (China)","ror":"https://ror.org/02ksqcf75","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726870"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qianying Meng","raw_affiliation_strings":["DiDi Chuxing, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DiDi Chuxing, Beijing, China","institution_ids":["https://openalex.org/I4401726870"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100380922","display_name":"Jia Xu","orcid":"https://orcid.org/0009-0001-4868-825X"},"institutions":[{"id":"https://openalex.org/I4401726870","display_name":"Didi Chuxing (China)","ror":"https://ror.org/02ksqcf75","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726870"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jia Xu","raw_affiliation_strings":["DiDi Chuxing, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DiDi Chuxing, Beijing, China","institution_ids":["https://openalex.org/I4401726870"]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.0445,"has_fulltext":true,"cited_by_count":23,"citation_normalized_percentile":{"value":0.96507115,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3838","last_page":"3848"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11942","display_name":"Transportation and Mobility Innovations","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11942","display_name":"Transportation and Mobility Innovations","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10768","display_name":"Electric Vehicles and Infrastructure","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8623262643814087},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7932013869285583},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.7750883102416992},{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.6135861873626709},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5924633145332336},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4529383182525635},{"id":"https://openalex.org/keywords/performance-improvement","display_name":"Performance improvement","score":0.43507322669029236},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.42980724573135376},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40953901410102844},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09894675016403198}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8623262643814087},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7932013869285583},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.7750883102416992},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.6135861873626709},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5924633145332336},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4529383182525635},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.43507322669029236},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42980724573135376},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40953901410102844},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09894675016403198},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3534678.3539095","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3534678.3539095","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3534678.3539095","source":{"id":"https://openalex.org/S4363608767","display_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3534678.3539095","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3534678.3539095","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3534678.3539095","source":{"id":"https://openalex.org/S4363608767","display_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.4399999976158142,"id":"https://metadata.un.org/sdg/1","display_name":"No poverty"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4291125977.pdf","grobid_xml":"https://content.openalex.org/works/W4291125977.grobid-xml"},"referenced_works_count":10,"referenced_works":["https://openalex.org/W2112420033","https://openalex.org/W2745284275","https://openalex.org/W2803156588","https://openalex.org/W2811315250","https://openalex.org/W2909452395","https://openalex.org/W2952281591","https://openalex.org/W3122391994","https://openalex.org/W3166344325","https://openalex.org/W3172786397","https://openalex.org/W3184771397"],"related_works":["https://openalex.org/W2905433371","https://openalex.org/W4390569940","https://openalex.org/W2888392564","https://openalex.org/W4361193272","https://openalex.org/W4310278675","https://openalex.org/W4388422664","https://openalex.org/W2806259446","https://openalex.org/W2963326959","https://openalex.org/W4247136043","https://openalex.org/W4312407344"],"abstract_inverted_index":{"In":[0,122,175,214,262],"this":[1,68],"study,":[2],"a":[3,95,124,152],"scalable":[4,88],"and":[5,15,51,87,110,114,173,209,230],"real-time":[6],"dispatching":[7,26,43,98],"algorithm":[8,217,250],"based":[9,33,130,161,191],"on":[10,34,131,162,192],"reinforcement":[11],"learning":[12,41,113],"is":[13,20,101,134,137,156,197,201,218,233,277],"proposed":[14,135,157],"for":[16,77,119,177,227],"the":[17,55,141,145,148,163,166,179,193,216,238,243],"first":[18],"time,":[19],"deployed":[21,219,249],"in":[22,28,67,80,169,221,235,255,273],"large":[23],"scale.":[24],"Current":[25],"methods":[27],"ridehailing":[29],"platforms":[30],"are":[31,46],"dominantly":[32],"myopic":[35],"or":[36],"rule-based":[37],"non-myopic":[38],"approaches.":[39],"Reinforcement":[40],"enables":[42],"policies":[44],"that":[45,100,136,158],"informed":[47],"of":[48,61,82,127,144,165,181,237,246],"historical":[49],"data":[50,208],"able":[52],"to":[53,58,107,140],"employ":[54],"learned":[56],"information":[57],"optimize":[59],"returns":[60],"expected":[62],"future":[63],"trajectories.":[64],"Previous":[65],"studies":[66],"field":[69],"yielded":[70],"promising":[71],"results,":[72],"yet":[73],"have":[74],"left":[75],"room":[76],"further":[78],"improvements":[79],"terms":[81],"performance":[83,171,212,275],"gain,":[84],"self-dependency,":[85],"transferability,":[86],"deployment":[89],"mechanisms.":[90],"The":[91,199,248],"present":[92],"study":[93],"proposes":[94],"standalone":[96],"RL-based":[97],"solution":[99],"equipped":[102],"with":[103,206],"multiple":[104,222],"novel":[105],"mechanisms":[106],"ensure":[108],"robust":[109],"efficient":[111],"on-policy":[112],"inference":[115,266],"while":[116],"being":[117],"adaptable":[118],"full-scale":[120,280],"deployment.":[121,281],"particular,":[123],"new":[125],"form":[126],"value":[128],"updating":[129],"temporal":[132],"difference":[133],"more":[138,231],"adapted":[139],"inherent":[142],"uncertainty":[143],"problem.":[146],"For":[147],"driver-order":[149],"assignment":[150],"problem,":[151],"customized":[153],"utility":[154],"function":[155],"when":[159],"tuned":[160],"statistics":[164],"market,":[167],"results":[168],"remarkable":[170],"improvement":[172,254,272],"interpretability.":[174],"addition,":[176,215,263],"reducing":[178],"risk":[180],"cancellation":[182],"after":[183,279],"drivers'":[184],"assignment,":[185],"an":[186],"adaptive":[187],"graph":[188],"pruning":[189],"strategy":[190],"multi-arm":[194],"bandit":[195],"problem":[196],"introduced.":[198],"method":[200],"evaluated":[202],"using":[203],"offline":[204],"simulation":[205],"real":[207],"yields":[210],"notable":[211],"improvement.":[213],"online":[220],"cities":[223],"under":[224],"DiDi's":[225],"operation":[226],"A/B":[228,260],"testing":[229],"recently,":[232],"launched":[234],"one":[236],"major":[239,274],"international":[240],"markets":[241],"as":[242,268,270],"primary":[244],"mode":[245],"dispatch.":[247],"shows":[251],"over":[252],"1.3%":[253],"total":[256],"driver":[257],"income":[258],"from":[259],"testing.":[261],"by":[264],"causal":[265],"analysis,":[267],"much":[269],"5.3%":[271],"metrics":[276],"detected":[278]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":3}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
