{"id":"https://openalex.org/W4304140699","doi":"https://doi.org/10.1109/socc56010.2022.9908120","title":"Cache-locality Based Adaptive Warp Scheduling for Neural Network Acceleration on GPGPUs","display_name":"Cache-locality Based Adaptive Warp Scheduling for Neural Network Acceleration on GPGPUs","publication_year":2022,"publication_date":"2022-09-05","ids":{"openalex":"https://openalex.org/W4304140699","doi":"https://doi.org/10.1109/socc56010.2022.9908120"},"language":"en","primary_location":{"id":"doi:10.1109/socc56010.2022.9908120","is_oa":false,"landing_page_url":"https://doi.org/10.1109/socc56010.2022.9908120","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 35th International System-on-Chip Conference (SOCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079217067","display_name":"Weiming Hu","orcid":"https://orcid.org/0000-0003-4501-1435"},"institutions":[{"id":"https://openalex.org/I30809798","display_name":"ShanghaiTech University","ror":"https://ror.org/030bhh786","country_code":"CN","type":"education","lineage":["https://openalex.org/I30809798"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weiming Hu","raw_affiliation_strings":["School of Information Science and Technology, ShanghaiTech University,Shanghai,China","School of Information Science and Technology, ShanghaiTech University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, ShanghaiTech University,Shanghai,China","institution_ids":["https://openalex.org/I30809798"]},{"raw_affiliation_string":"School of Information Science and Technology, ShanghaiTech University, Shanghai, China","institution_ids":["https://openalex.org/I30809798"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101854123","display_name":"Yi Zhou","orcid":"https://orcid.org/0009-0004-8877-5173"},"institutions":[{"id":"https://openalex.org/I4210161025","display_name":"Jordan Valley Semiconductors (China)","ror":"https://ror.org/05516nt33","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210149196","https://openalex.org/I4210161025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Zhou","raw_affiliation_strings":["Glenfly Tech Co., Ltd.,Shanghai,China","Glenfly Tech Co., Ltd., Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Glenfly Tech Co., Ltd.,Shanghai,China","institution_ids":["https://openalex.org/I4210161025"]},{"raw_affiliation_string":"Glenfly Tech Co., Ltd., Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112751228","display_name":"Ying Quan","orcid":"https://orcid.org/0009-0008-3323-7123"},"institutions":[{"id":"https://openalex.org/I4210161025","display_name":"Jordan Valley Semiconductors (China)","ror":"https://ror.org/05516nt33","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210149196","https://openalex.org/I4210161025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Quan","raw_affiliation_strings":["Glenfly Tech Co., Ltd.,Shanghai,China","Glenfly Tech Co., Ltd., Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Glenfly Tech Co., Ltd.,Shanghai,China","institution_ids":["https://openalex.org/I4210161025"]},{"raw_affiliation_string":"Glenfly Tech Co., Ltd., Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100635361","display_name":"Yuanfeng Wang","orcid":"https://orcid.org/0000-0001-7573-9895"},"institutions":[{"id":"https://openalex.org/I4210161025","display_name":"Jordan Valley Semiconductors (China)","ror":"https://ror.org/05516nt33","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210149196","https://openalex.org/I4210161025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanfeng Wang","raw_affiliation_strings":["Glenfly Tech Co., Ltd.,Shanghai,China","Glenfly Tech Co., Ltd., Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Glenfly Tech Co., Ltd.,Shanghai,China","institution_ids":["https://openalex.org/I4210161025"]},{"raw_affiliation_string":"Glenfly Tech Co., Ltd., Shanghai, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063714836","display_name":"Xin Lou","orcid":"https://orcid.org/0000-0002-8499-5038"},"institutions":[{"id":"https://openalex.org/I30809798","display_name":"ShanghaiTech University","ror":"https://ror.org/030bhh786","country_code":"CN","type":"education","lineage":["https://openalex.org/I30809798"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Lou","raw_affiliation_strings":["School of Information Science and Technology, ShanghaiTech University,Shanghai,China","School of Information Science and Technology, ShanghaiTech University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, ShanghaiTech University,Shanghai,China","institution_ids":["https://openalex.org/I30809798"]},{"raw_affiliation_string":"School of Information Science and Technology, ShanghaiTech University, Shanghai, China","institution_ids":["https://openalex.org/I30809798"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5079217067"],"corresponding_institution_ids":["https://openalex.org/I30809798"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.09450418,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"25","issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9180664420127869},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7597476243972778},{"id":"https://openalex.org/keywords/locality-of-reference","display_name":"Locality of reference","score":0.7127121686935425},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.6125469207763672},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5997958183288574},{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.5929219126701355},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.5298799276351929},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.4968600571155548},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.15335074067115784}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9180664420127869},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7597476243972778},{"id":"https://openalex.org/C27602214","wikidata":"https://www.wikidata.org/wiki/Q1868547","display_name":"Locality of reference","level":3,"score":0.7127121686935425},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.6125469207763672},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5997958183288574},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.5929219126701355},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5298799276351929},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.4968600571155548},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.15335074067115784},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/socc56010.2022.9908120","is_oa":false,"landing_page_url":"https://doi.org/10.1109/socc56010.2022.9908120","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 35th International System-on-Chip Conference (SOCC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W599934088","https://openalex.org/W1968775993","https://openalex.org/W1979527452","https://openalex.org/W1983235612","https://openalex.org/W1984222112","https://openalex.org/W2047060659","https://openalex.org/W2080592089","https://openalex.org/W2081583983","https://openalex.org/W2090584832","https://openalex.org/W2098505406","https://openalex.org/W2128120785","https://openalex.org/W2142444503","https://openalex.org/W2155503253","https://openalex.org/W2163605009","https://openalex.org/W2194775991","https://openalex.org/W2273440736","https://openalex.org/W2285660444","https://openalex.org/W2289252105","https://openalex.org/W2328929289","https://openalex.org/W2565960208","https://openalex.org/W2911720734","https://openalex.org/W2937249639","https://openalex.org/W3118608800","https://openalex.org/W6684191040","https://openalex.org/W6694513646","https://openalex.org/W6758346439","https://openalex.org/W6787972765"],"related_works":["https://openalex.org/W1555349535","https://openalex.org/W2583128298","https://openalex.org/W2053359564","https://openalex.org/W2161159383","https://openalex.org/W1495260638","https://openalex.org/W1511204342","https://openalex.org/W2369125128","https://openalex.org/W2369223577","https://openalex.org/W2010020348","https://openalex.org/W1982580072"],"abstract_inverted_index":{"In":[0,53],"many":[1],"emerging":[2],"applications":[3],"such":[4],"as":[5,19],"convolutional":[6],"neural":[7],"networks":[8],"(CNNs),":[9],"general":[10],"purpose":[11],"graph":[12],"processing":[13],"units":[14],"(GPGPUs)":[15],"are":[16],"widely":[17],"used":[18],"computing":[20],"devices.":[21],"For":[22],"GPGPU":[23],"computing,":[24],"warp":[25,39,111,138],"scheduling":[26,40,84,117,169],"policy":[27,41,118],"is":[28,68,132,171],"crucial":[29],"for":[30,46,150],"the":[31,58,97,102,110,115,129,136,151,163,167],"overall":[32],"performance.":[33],"We":[34,159],"find":[35],"that":[36,70,128,162],"a":[37,50,74,78,87,91],"single":[38],"cannot":[42],"provide":[43],"optimal":[44],"performance":[45,80,149],"all":[47],"layers":[48,72],"in":[49,63],"CNN":[51,65,155],"model.":[52],"this":[54],"paper,":[55],"we":[56,89],"analyze":[57],"workload":[59,98],"of":[60,73,104,153],"each":[61],"layer":[62],"typical":[64],"models.":[66],"It":[67],"observed":[69],"multiple":[71],"network":[75],"model":[76],"have":[77],"significant":[79],"gap":[81],"under":[82],"different":[83],"policies.":[85],"As":[86],"result,":[88],"propose":[90],"cache":[92,105],"tag":[93],"buffer":[94],"to":[95,101,134,147],"evaluate":[96],"characteristics":[99],"according":[100],"type":[103],"locality.":[106],"Based":[107],"on":[108,157],"that,":[109],"scheduler":[112,139],"adaptively":[113],"selects":[114],"proper":[116],"among":[119],"Loosely-Round-Robin":[120],"(LRR)":[121],"and":[122,142],"Greedy-Then-Oldest":[123],"(GTO).":[124],"Evaluation":[125],"results":[126],"show":[127,161],"proposed":[130,168],"mechanism":[131],"able":[133],"select":[135],"better":[137],"between":[140],"LRR":[141],"GTO":[143],"at":[144],"runtime,":[145],"translating":[146],"superior":[148],"computation":[152],"various":[154],"models":[156],"GPGPUs.":[158],"also":[160],"overhead":[164],"introduced":[165],"by":[166],"method":[170],"small.":[172]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
