{"id":"https://openalex.org/W3101434632","doi":"https://doi.org/10.1109/tkde.2020.3038109","title":"CuWide: Towards Efficient Flow-Based Training for Sparse Wide Models on GPUs","display_name":"CuWide: Towards Efficient Flow-Based Training for Sparse Wide Models on GPUs","publication_year":2020,"publication_date":"2020-11-16","ids":{"openalex":"https://openalex.org/W3101434632","doi":"https://doi.org/10.1109/tkde.2020.3038109","mag":"3101434632"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2020.3038109","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2020.3038109","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015552951","display_name":"Xupeng Miao","orcid":"https://orcid.org/0000-0002-9371-8358"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xupeng Miao","raw_affiliation_strings":["Key Lab of High Confidence Software Technologies (MOE), School of EECS, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Lab of High Confidence Software Technologies (MOE), School of EECS, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102874660","display_name":"Lingxiao Ma","orcid":"https://orcid.org/0009-0009-9524-5476"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lingxiao Ma","raw_affiliation_strings":["Key Lab of High Confidence Software Technologies (MOE), School of EECS, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Lab of High Confidence Software Technologies (MOE), School of EECS, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102862758","display_name":"Zhi Yang","orcid":"https://orcid.org/0000-0002-8219-4499"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhi Yang","raw_affiliation_strings":["Key Lab of High Confidence Software Technologies (MOE), School of EECS, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Lab of High Confidence Software Technologies (MOE), School of EECS, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014615052","display_name":"Yingxia Shao","orcid":"https://orcid.org/0000-0002-8559-2628"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yingxia Shao","raw_affiliation_strings":["School of Computer Science, Beijing University of Posts and Telecommunications, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Beijing University of Posts and Telecommunications, Beijing, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062357883","display_name":"Bin Cui","orcid":"https://orcid.org/0000-0003-1681-4677"},"institutions":[{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Cui","raw_affiliation_strings":["Key Lab of High Confidence Software Technologies (MOE), School of EECS, Institute of Computational Social Science, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Key Lab of High Confidence Software Technologies (MOE), School of EECS, Institute of Computational Social Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040157606","display_name":"Lele Yu","orcid":"https://orcid.org/0000-0001-9019-9532"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lele Yu","raw_affiliation_strings":["Tencent Inc., Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tencent Inc., Shenzhen, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102918834","display_name":"Jiawei Jiang","orcid":"https://orcid.org/0000-0003-0051-0046"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Jiawei Jiang","raw_affiliation_strings":["ETH Zurich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5015552951"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":1.3876,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.83200175,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"34","issue":"9","first_page":"4119","last_page":"4132"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9139889478683472},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5739396214485168},{"id":"https://openalex.org/keywords/memory-hierarchy","display_name":"Memory hierarchy","score":0.5715568661689758},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.510223388671875},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.4764963984489441},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4396221339702606},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.43781882524490356},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4234568774700165},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.3766249418258667},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3327634036540985},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.14632117748260498}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9139889478683472},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5739396214485168},{"id":"https://openalex.org/C2778100165","wikidata":"https://www.wikidata.org/wiki/Q1589327","display_name":"Memory hierarchy","level":3,"score":0.5715568661689758},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.510223388671875},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.4764963984489441},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4396221339702606},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.43781882524490356},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4234568774700165},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.3766249418258667},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3327634036540985},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.14632117748260498},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tkde.2020.3038109","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2020.3038109","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1886250870","display_name":null,"funder_award_id":"61972004","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4253300795","display_name":null,"funder_award_id":"61832001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4890695131","display_name":null,"funder_award_id":"61702016","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5620609558","display_name":null,"funder_award_id":"U1936104","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7018988956","display_name":null,"funder_award_id":"61702015","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7748544612","display_name":null,"funder_award_id":"2020RC25","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":72,"referenced_works":["https://openalex.org/W778657980","https://openalex.org/W1788418780","https://openalex.org/W1806891645","https://openalex.org/W1869625623","https://openalex.org/W2029463628","https://openalex.org/W2032775418","https://openalex.org/W2060393849","https://openalex.org/W2074694452","https://openalex.org/W2083842231","https://openalex.org/W2088134616","https://openalex.org/W2094286023","https://openalex.org/W2096544401","https://openalex.org/W2097486709","https://openalex.org/W2103670492","https://openalex.org/W2115472805","https://openalex.org/W2141711298","https://openalex.org/W2153635508","https://openalex.org/W2163605009","https://openalex.org/W2171461858","https://openalex.org/W2255383145","https://openalex.org/W2295598076","https://openalex.org/W2335835108","https://openalex.org/W2474388053","https://openalex.org/W2475334473","https://openalex.org/W2529865518","https://openalex.org/W2585029434","https://openalex.org/W2604519798","https://openalex.org/W2604662567","https://openalex.org/W2612026221","https://openalex.org/W2618530766","https://openalex.org/W2696534605","https://openalex.org/W2740828438","https://openalex.org/W2782213427","https://openalex.org/W2788193959","https://openalex.org/W2805564340","https://openalex.org/W2808084211","https://openalex.org/W2888072858","https://openalex.org/W2892656258","https://openalex.org/W2899771611","https://openalex.org/W2906007643","https://openalex.org/W2929353582","https://openalex.org/W2945003059","https://openalex.org/W2948349252","https://openalex.org/W2952594493","https://openalex.org/W2953384591","https://openalex.org/W2962991166","https://openalex.org/W2963390885","https://openalex.org/W2963723139","https://openalex.org/W2972087877","https://openalex.org/W2972734853","https://openalex.org/W2998304172","https://openalex.org/W2999324038","https://openalex.org/W3016935098","https://openalex.org/W3023496898","https://openalex.org/W3029379969","https://openalex.org/W3035078899","https://openalex.org/W3037681773","https://openalex.org/W4289469122","https://openalex.org/W4295332771","https://openalex.org/W6622473587","https://openalex.org/W6683646410","https://openalex.org/W6684952740","https://openalex.org/W6691768974","https://openalex.org/W6713134421","https://openalex.org/W6721281333","https://openalex.org/W6741910194","https://openalex.org/W6747652377","https://openalex.org/W6748515141","https://openalex.org/W6755130838","https://openalex.org/W6756040250","https://openalex.org/W6758474236","https://openalex.org/W6764647576"],"related_works":["https://openalex.org/W2769189194","https://openalex.org/W1537323515","https://openalex.org/W2353852602","https://openalex.org/W2120249721","https://openalex.org/W2078036665","https://openalex.org/W778337757","https://openalex.org/W4293390906","https://openalex.org/W2735130281","https://openalex.org/W1990309876","https://openalex.org/W79990711"],"abstract_inverted_index":{"Wide":[0],"models":[1,6,9,72,90,106,171],"such":[2],"as":[3],"generalized":[4],"linear":[5],"and":[7,22,51,55,81,123,166,198,215,230,239,278,285,305],"factorization-based":[8],"have":[10,47],"been":[11],"extensively":[12],"used":[13],"in":[14,83,275],"various":[15],"predictive":[16],"applications,":[17],"e.g.,":[18],"recommendation,":[19],"CTR":[20],"prediction,":[21],"image":[23],"recognition.":[24],"Due":[25],"to":[26,46,78,172,192,222,244,264,271,281,295],"the":[27,32,34,41,66,70,79,95,104,111,139,149,164,175,195,208,266,301],"memory":[28,53,121,150,225],"bounded":[29],"property":[30],"of":[31,103,119,126,152,169,177,211],"models,":[33,142],"performance":[35,248],"improvement":[36],"on":[37],"CPU":[38,307],"is":[39,44,73],"reaching":[40],"limitation.":[42],"GPU":[43,67,112,180,224,303],"known":[45],"many":[48],"computation":[49,190],"units":[50],"high":[52,247],"bandwidth,":[54],"becomes":[56],"a":[57,156,188,246],"promising":[58],"platform":[59],"for":[60,69,110,138,160,203,227],"training":[61,68,101],"machine":[62],"learning":[63],"models.":[64,85],"However,":[65],"wide":[71,84,89,105,141,170],"far":[74],"from":[75,116,148],"optimal":[76],"due":[77],"sparsity":[80],"irregularity":[82],"The":[86,99],"existing":[87],"GPU-based":[88],"are":[91],"even":[92],"slower":[93],"than":[94,297,300],"ones":[96],"using":[97],"CPU.":[98],"classical":[100],"schema":[102,159,197],"does":[107],"not":[108],"optimized":[109],"architecture,":[113],"which":[114,162],"suffers":[115],"large":[117],"amount":[118,176],"random":[120],"accesses":[122],"redundant":[124],"read/write":[125],"intermediate":[127],"values.":[128],"In":[129],"this":[130],"paper,":[131],"we":[132,186,206,254],"propose":[133,256],"an":[134],"efficient":[135],"GPU-training":[136],"framework":[137],"large-scale":[140],"named":[143],"cuWide.":[144],"To":[145,183,250],"fully":[146],"benefit":[147],"hierarchy":[151],"GPU,":[153],"cuWide":[154,291],"applies":[155],"new":[157],"flow-based":[158,196],"training,":[161],"leverages":[163],"spatial":[165],"temporal":[167],"locality":[168],"drastically":[173],"reduce":[174,272],"communication":[178],"with":[179,218],"global":[181],"memory.":[182],"do":[184],"so,":[185],"adopt":[187],"bigraph":[189],"model":[191,237],"efficiently":[193,251],"realize":[194],"exploit":[199],"three":[200],"flexible":[201],"interfaces":[202],"programming.":[204],"Further,":[205],"use":[207],"2D":[209],"partition":[210],"mini-batch":[212],"(in":[213],"sample":[214],"feature":[216],"dimensions)":[217],"proposed":[219],"graph":[220],"abstraction":[221],"optimize":[223],"access":[226],"sparse":[228],"data,":[229],"apply":[231],"several":[232,257],"spatial-temporal":[233],"caching":[234,238,242],"mechanisms":[235],"(importance-based":[236],"cross-stage":[240],"accumulation":[241],"mechanisms)":[243],"achieve":[245],"kernel.":[249],"implement":[252],"cuWide,":[253],"also":[255],"GPU-oriented":[258],"optimizations,":[259],"including":[260],"feature-oriented":[261],"data":[262,267,283],"layout":[263],"enhance":[265],"locality,":[268],"replication":[269],"mechanism":[270],"update":[273],"conflicts":[274],"shared":[276],"memory,":[277],"multi-stream":[279],"scheduling":[280],"overlap":[282],"transferring":[284],"kernel":[286],"computing.":[287],"We":[288],"show":[289],"that":[290],"can":[292],"be":[293],"up":[294],"more":[296],"20\u00d7":[298],"faster":[299],"state-of-the-art":[302],"solutions":[304],"multi-core":[306],"solutions.":[308]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":8}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
