{"id":"https://openalex.org/W7162433794","doi":"https://doi.org/10.1145/3788853.3803078","title":"TokaDB: A Unified Storage Engine for Training-Serving Data Management in Large Recommendation Models","display_name":"TokaDB: A Unified Storage Engine for Training-Serving Data Management in Large Recommendation Models","publication_year":2026,"publication_date":"2026-05-26","ids":{"openalex":"https://openalex.org/W7162433794","doi":"https://doi.org/10.1145/3788853.3803078"},"language":null,"primary_location":{"id":"doi:10.1145/3788853.3803078","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3788853.3803078","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion of the International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3788853.3803078","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5137034119","display_name":"Peng Fang","orcid":"https://orcid.org/0000-0003-4741-9282"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Fang","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0003-4741-9282","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137051145","display_name":"Kelei Guo","orcid":"https://orcid.org/0009-0005-7758-3819"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kelei Guo","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0005-7758-3819","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137007024","display_name":"Cheng Chen","orcid":"https://orcid.org/0000-0002-4203-2145"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng Chen","raw_affiliation_strings":["Bytedance Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-2622-4075","affiliations":[{"raw_affiliation_string":"Bytedance Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137062138","display_name":"Wei Zhang","orcid":"https://orcid.org/0009-0008-6192-6612"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei Zhang","raw_affiliation_strings":["Bytedance Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0009-0008-6192-6612","affiliations":[{"raw_affiliation_string":"Bytedance Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137062762","display_name":"Mingming Chen","orcid":"https://orcid.org/0009-0004-4915-354X"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingming Chen","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0004-4915-354X","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137054949","display_name":"Huaye Xu","orcid":"https://orcid.org/0009-0007-8598-4051"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huaye Xu","raw_affiliation_strings":["Bytedance Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0009-0007-8598-4051","affiliations":[{"raw_affiliation_string":"Bytedance Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136998599","display_name":"Borong Meng","orcid":"https://orcid.org/0009-0001-7263-5158"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Borong Meng","raw_affiliation_strings":["Bytedance Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0009-0001-7263-5158","affiliations":[{"raw_affiliation_string":"Bytedance Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136998985","display_name":"Zongjia Chen","orcid":"https://orcid.org/0009-0006-0762-0888"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zongjia Chen","raw_affiliation_strings":["Bytedance Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-0762-0888","affiliations":[{"raw_affiliation_string":"Bytedance Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137035724","display_name":"Mingshuai Wang","orcid":"https://orcid.org/0009-0008-3686-0789"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mingshuai Wang","raw_affiliation_strings":["Bytedance Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0009-0008-3686-0789","affiliations":[{"raw_affiliation_string":"Bytedance Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137000554","display_name":"Luping Wang","orcid":"https://orcid.org/0009-0005-4345-9035"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luping Wang","raw_affiliation_strings":["Bytedance Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0009-0005-4345-9035","affiliations":[{"raw_affiliation_string":"Bytedance Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137072055","display_name":"Yuan Zhang","orcid":"https://orcid.org/0009-0001-4877-473X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan Zhang","raw_affiliation_strings":["Bytedance Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0009-0001-4877-473X","affiliations":[{"raw_affiliation_string":"Bytedance Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043214877","display_name":"Shiru Ren","orcid":"https://orcid.org/0000-0002-2430-2009"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shiru Ren","raw_affiliation_strings":["Bytedance Inc., Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-2430-2009","affiliations":[{"raw_affiliation_string":"Bytedance Inc., Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137059710","display_name":"Fang Wang","orcid":"https://orcid.org/0000-0002-2791-4158"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fang Wang","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0002-2791-4158","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5137022837","display_name":"Dan Feng","orcid":"https://orcid.org/0000-0002-4674-6006"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dan Feng","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0002-4674-6006","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":14,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.86486892,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"293","last_page":"306"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.5424000024795532,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.5424000024795532,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.03269999846816063,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.028200000524520874,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-management","display_name":"Data management","score":0.39419999718666077},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.3107999861240387},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.2888999879360199},{"id":"https://openalex.org/keywords/computer-data-storage","display_name":"Computer data storage","score":0.2874000072479248},{"id":"https://openalex.org/keywords/management-system","display_name":"Management system","score":0.2858000099658966},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.27379998564720154}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6273999810218811},{"id":"https://openalex.org/C1668388","wikidata":"https://www.wikidata.org/wiki/Q1149776","display_name":"Data management","level":2,"score":0.39419999718666077},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.36550000309944153},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3409000039100647},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.3107999861240387},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2888999879360199},{"id":"https://openalex.org/C194739806","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Computer data storage","level":2,"score":0.2874000072479248},{"id":"https://openalex.org/C198783460","wikidata":"https://www.wikidata.org/wiki/Q629173","display_name":"Management system","level":2,"score":0.2858000099658966},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.27379998564720154},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.2718000113964081},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.2533999979496002}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3788853.3803078","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3788853.3803078","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion of the International Conference on Management of Data","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3788853.3803078","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3788853.3803078","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Companion of the International Conference on Management of Data","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1791587242","https://openalex.org/W2005112390","https://openalex.org/W2085601491","https://openalex.org/W2164705534","https://openalex.org/W2426624872","https://openalex.org/W2512971201","https://openalex.org/W2534983812","https://openalex.org/W2594680891","https://openalex.org/W2973172293","https://openalex.org/W3093519337","https://openalex.org/W3207452942","https://openalex.org/W4235336266","https://openalex.org/W4237212714","https://openalex.org/W4288083766","https://openalex.org/W4385562613","https://openalex.org/W4386768921","https://openalex.org/W4387321091","https://openalex.org/W4391054874","https://openalex.org/W4391136507","https://openalex.org/W4396220739","https://openalex.org/W4400528024","https://openalex.org/W4400910425","https://openalex.org/W4401212200","https://openalex.org/W4401834466","https://openalex.org/W4403218830","https://openalex.org/W4403220611","https://openalex.org/W4403577794","https://openalex.org/W4404181190","https://openalex.org/W4409366070","https://openalex.org/W4412377020","https://openalex.org/W4414034912","https://openalex.org/W4414034934","https://openalex.org/W4414241786","https://openalex.org/W4415796681"],"related_works":[],"abstract_inverted_index":{"Large":[0],"recommendation":[1,15],"models":[2],"(LRM),":[3],"powered":[4],"by":[5,108,206],"Transformer":[6],"architectures,":[7],"are":[8],"rapidly":[9],"becoming":[10],"the":[11,132],"backbone":[12],"of":[13,153],"modern":[14],"systems.":[16],"However,":[17],"their":[18],"training-serving":[19],"lifecycle":[20],"imposes":[21],"unprecedented":[22],"challenges":[23,57],"on":[24],"data":[25,88,107,114],"management:":[26],"hybrid":[27],"access":[28],"patterns":[29],"for":[30,36,76],"user":[31,82],"behavior":[32,83],"sequences,":[33],"cost-performance":[34],"trade-offs":[35],"exabyte-scale":[37],"KV":[38,86],"Cache":[39,87],"data,":[40],"and":[41,49,85,91,110,121,134,156,164,177,198],"resource":[42],"contention":[43],"between":[44],"prioritized":[45,161],"workloads":[46],"during":[47],"training":[48,90],"serving.":[50],"Existing":[51],"approaches":[52],"fail":[53],"to":[54,60,115,137,159],"address":[55],"these":[56],"simultaneously,":[58],"leading":[59],"performance":[61,203],"bottlenecks":[62],"or":[63],"excessive":[64],"costs.":[65],"In":[66],"this":[67],"paper,":[68],"we":[69],"present":[70],"TokaDB,":[71],"a":[72],"unified":[73],"storage":[74],"engine":[75],"LRM":[77,181],"that":[78,104,130,149,186],"efficiently":[79],"manages":[80],"both":[81,117],"sequences":[84],"across":[89,172],"serving":[92],"stages.":[93],"TokaDB":[94,168,187],"introduces":[95],"three":[96],"core":[97],"innovations:":[98],"(1)":[99],"Hybrid-Access":[100],"Specialized":[101],"Data":[102],"Organization":[103],"segments":[105],"sequence":[106],"time":[109],"decouples":[111],"indexes":[112],"from":[113],"support":[116],"high-throughput":[118],"point":[119],"reads":[120],"high-bandwidth":[122],"range":[123],"scans;":[124],"(2)":[125],"Cost-Efficient":[126],"Cooperative":[127],"I/O":[128,157],"Engine":[129],"co-optimizes":[131],"read":[133],"write":[135],"paths":[136],"reduce":[138],"tail":[139],"latency":[140],"while":[141],"sustaining":[142],"high":[143],"throughput;":[144],"(3)":[145],"Priority-Based":[146],"Resource":[147],"Manager":[148],"enforces":[150],"priority-aware":[151],"sharing":[152],"CPU,":[154],"cache,":[155],"resources":[158],"eliminate":[160],"workload":[162],"interference":[163],"ensure":[165],"predictable":[166],"performance.":[167],"has":[169],"been":[170],"deployed":[171],"production":[173],"services":[174],"at":[175],"ByteDance":[176],"validated":[178],"under":[179],"real":[180],"workloads.":[182],"Experimental":[183],"results":[184],"show":[185],"achieves":[188],"4-10\u00d7":[189],"end-to-end":[190],"speedups":[191],"over":[192],"mainstream":[193],"systems":[194],"including":[195],"HBase,":[196],"Doris,":[197],"Doris+,":[199],"satisfying":[200],"all":[201],"critical":[202],"dimensions":[204],"required":[205],"LRM.":[207]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-27T00:00:00"}
