{"id":"https://openalex.org/W3118616291","doi":"https://doi.org/10.1145/3437801.3441609","title":"I/O lower bounds for auto-tuning of convolutions in CNNs","display_name":"I/O lower bounds for auto-tuning of convolutions in CNNs","publication_year":2021,"publication_date":"2021-02-17","ids":{"openalex":"https://openalex.org/W3118616291","doi":"https://doi.org/10.1145/3437801.3441609","mag":"3118616291"},"language":"en","primary_location":{"id":"doi:10.1145/3437801.3441609","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3437801.3441609","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3437801.3441609","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3437801.3441609","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054482220","display_name":"Xiaoyang Zhang","orcid":"https://orcid.org/0000-0001-8414-1449"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaoyang Zhang","raw_affiliation_strings":["University of Chinese Academy of Science"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Science","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010808858","display_name":"Junmin Xiao","orcid":"https://orcid.org/0000-0003-0457-4709"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junmin Xiao","raw_affiliation_strings":["University of Chinese Academy of Science"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Science","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5104072170","display_name":"Guangming Tan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangming Tan","raw_affiliation_strings":["University of Chinese Academy of Science"],"affiliations":[{"raw_affiliation_string":"University of Chinese Academy of Science","institution_ids":["https://openalex.org/I4210165038"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5054482220"],"corresponding_institution_ids":["https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.6798,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.70044907,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"247","last_page":"261"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.7654860019683838},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.7298698425292969},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7102988958358765},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5720629096031189},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5284996628761292},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.49820804595947266},{"id":"https://openalex.org/keywords/upper-and-lower-bounds","display_name":"Upper and lower bounds","score":0.4536149501800537},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.44004693627357483},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4157434403896332},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.22619253396987915},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.18685448169708252},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1385784149169922}],"concepts":[{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.7654860019683838},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.7298698425292969},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7102988958358765},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5720629096031189},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5284996628761292},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.49820804595947266},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.4536149501800537},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.44004693627357483},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4157434403896332},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22619253396987915},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.18685448169708252},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1385784149169922},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3437801.3441609","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3437801.3441609","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3437801.3441609","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3437801.3441609","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3437801.3441609","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3437801.3441609","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2672509926","display_name":null,"funder_award_id":"62032023, 61802369","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317612542","display_name":null,"funder_award_id":"2016YFB0200800","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3408156377","display_name":null,"funder_award_id":"2018AAA0103302, 2016YFC1401706, 2016YFB0200800","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G37568934","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G384178317","display_name":null,"funder_award_id":"02008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4588672186","display_name":null,"funder_award_id":"2016YFB0200800","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G5832729736","display_name":null,"funder_award_id":"62032023","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6721297356","display_name":null,"funder_award_id":"61972377","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7726157001","display_name":null,"funder_award_id":"Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8114646031","display_name":null,"funder_award_id":"2016Y","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G8286755146","display_name":null,"funder_award_id":"61802369","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8372748291","display_name":null,"funder_award_id":"2018AAA0103302","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G850960841","display_name":null,"funder_award_id":"2016YFC","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322183","display_name":"Huawei Technologies","ror":"https://ror.org/00cmhce21"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3118616291.pdf","grobid_xml":"https://content.openalex.org/works/W3118616291.grobid-xml"},"referenced_works_count":36,"referenced_works":["https://openalex.org/W1526545853","https://openalex.org/W1564514837","https://openalex.org/W1667652561","https://openalex.org/W1943732115","https://openalex.org/W1972501001","https://openalex.org/W2038142281","https://openalex.org/W2104373803","https://openalex.org/W2107651134","https://openalex.org/W2117696986","https://openalex.org/W2128539477","https://openalex.org/W2138266087","https://openalex.org/W2157237396","https://openalex.org/W2164946224","https://openalex.org/W2274287116","https://openalex.org/W2279098554","https://openalex.org/W2295598076","https://openalex.org/W2442974303","https://openalex.org/W2570467259","https://openalex.org/W2612445135","https://openalex.org/W2766839578","https://openalex.org/W2788183263","https://openalex.org/W2797660040","https://openalex.org/W2807127337","https://openalex.org/W2886115771","https://openalex.org/W2949251082","https://openalex.org/W2962835968","https://openalex.org/W2963125010","https://openalex.org/W2965408806","https://openalex.org/W2984305089","https://openalex.org/W3013192691","https://openalex.org/W3017059173","https://openalex.org/W3041813360","https://openalex.org/W3100417409","https://openalex.org/W3102476541","https://openalex.org/W3104745751","https://openalex.org/W4243261006"],"related_works":["https://openalex.org/W2293118914","https://openalex.org/W2998381397","https://openalex.org/W4236419692","https://openalex.org/W2171015181","https://openalex.org/W3167919718","https://openalex.org/W4251718783","https://openalex.org/W4239447582","https://openalex.org/W1484403103","https://openalex.org/W2807127337","https://openalex.org/W2964954556"],"abstract_inverted_index":{"Convolution":[0],"is":[1,189],"the":[2,7,25,30,33,38,59,62,71,77,101,106,120,128,146,152,160,168,173,198,208,213,228,240,260,277,290],"most":[3],"time-consuming":[4],"part":[5],"in":[6,19,32,64,117,164,219],"computation":[8],"of":[9,35,70,95,133,170,172,182,210,215],"convolutional":[10],"neural":[11],"networks":[12],"(CNNs),":[13],"which":[14,93,126,258],"have":[15],"achieved":[16],"great":[17],"successes":[18],"numerous":[20],"practical":[21],"applications.":[22],"Due":[23],"to":[24,57,166,191],"complex":[26],"data":[27,45,107,161],"dependency":[28],"and":[29,55,123,130,201,212,231],"increase":[31],"amount":[34],"model":[36],"samples,":[37],"convolution":[39,63,115,122,135,155,200,230],"suffers":[40],"from":[41,139],"high":[42],"overhead":[43],"on":[44,100,185,204,227,249,271],"movement":[46,108],"(i.e.,":[47],"memory":[48,217],"access).":[49],"This":[50],"work":[51],"provides":[52],"comprehensive":[53],"analysis":[54,69],"methodologies":[56],"minimize":[58],"communication":[60],"for":[61,89,112,151,197,263],"CNNs.":[65],"With":[66],"an":[67,179,193],"in-depth":[68],"recent":[72],"I/O":[73,85,140,186,272],"complexity":[74],"theory":[75,88],"under":[76],"red-blue":[78],"game":[79],"model,":[80],"we":[81,104,144],"develop":[82],"a":[83,90,134],"general":[84],"lower":[86,109,141,187,273],"bound":[87,110,142],"composite":[91],"algorithm":[92,203,233],"consists":[94],"several":[96],"different":[97],"sub-computations.":[98],"Based":[99],"proposed":[102,190],"theory,":[103],"establish":[105],"results":[111,226],"two":[113,153],"main":[114,154],"algorithms":[116,156],"CNNs,":[118],"namely":[119],"direct":[121,129,199,229],"Winograd":[124,202,232],"algorithm,":[125],"represents":[127,259],"indirect":[131],"implementations":[132],"respectively.":[136],"Next,":[137],"derived":[138],"results,":[143],"design":[145,181],"near":[147,174],"I/O-optimal":[148,175],"dataflow":[149,176,237],"strategies":[150,177,238],"by":[157,294],"fully":[158],"exploiting":[159],"reuse.":[162],"Furthermore,":[163],"order":[165],"push":[167],"envelope":[169],"performance":[171,247,288],"further,":[178],"aggressive":[180],"auto-tuning":[183,241,268],"based":[184,270],"bounds,":[188],"search":[192],"optimal":[194,278,291],"parameter":[195,279],"configuration":[196,280],"GPU,":[205],"such":[206],"as":[207],"number":[209],"threads":[211],"size":[214],"shared":[216],"used":[218],"each":[220],"thread":[221],"block.":[222],"Finally,":[223],"experiment":[224],"evaluation":[225],"show":[234],"that":[235],"our":[236,267,284],"with":[239,256],"approach":[242],"can":[243,275],"achieve":[244],"about":[245],"3.32\u00d7":[246],"speedup":[248],"average":[250],"over":[251],"cuDNN.":[252],"In":[253],"addition,":[254],"compared":[255],"TVM,":[257],"state-of-the-art":[261],"technique":[262],"auto-tuning,":[264],"not":[265],"only":[266],"method":[269],"bounds":[274],"find":[276],"faster,":[281],"but":[282],"also":[283],"solution":[285,292],"has":[286],"higher":[287],"than":[289],"provided":[293],"TVM.":[295]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2025-10-10T00:00:00"}
