{"id":"https://openalex.org/W4388763336","doi":"https://doi.org/10.1145/3632956","title":"Fast Convolution Meets Low Precision: Exploring Efficient Quantized Winograd Convolution on Modern CPUs","display_name":"Fast Convolution Meets Low Precision: Exploring Efficient Quantized Winograd Convolution on Modern CPUs","publication_year":2023,"publication_date":"2023-11-17","ids":{"openalex":"https://openalex.org/W4388763336","doi":"https://doi.org/10.1145/3632956"},"language":"en","primary_location":{"id":"doi:10.1145/3632956","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3632956","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3632956","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3632956","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100425338","display_name":"Xueying Wang","orcid":"https://orcid.org/0000-0002-7835-113X"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xueying Wang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications, China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100607669","display_name":"Guangli Li","orcid":"https://orcid.org/0000-0002-9738-261X"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangli Li","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences and University of Chinese Academy of Sciences, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences and University of Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102854276","display_name":"Zhen Jia","orcid":"https://orcid.org/0000-0003-3543-2324"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhen Jia","raw_affiliation_strings":["Amazon Web Services, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Web Services, USA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053070701","display_name":"Xiaobing Feng","orcid":"https://orcid.org/0000-0003-2909-7750"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaobing Feng","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences and University of Chinese Academy of Sciences, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences and University of Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101638214","display_name":"Yida Wang","orcid":"https://orcid.org/0000-0001-8165-840X"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yida Wang","raw_affiliation_strings":["Amazon Web Services, USA"],"affiliations":[{"raw_affiliation_string":"Amazon Web Services, USA","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100425338"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":0.2377,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.53975234,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"21","issue":"1","first_page":"1","last_page":"26"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7643191814422607},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.7577216625213623},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5357649922370911},{"id":"https://openalex.org/keywords/overlap\u2013add-method","display_name":"Overlap\u2013add method","score":0.49857544898986816},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4193279445171356},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3812931478023529},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.3687945604324341},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.22657933831214905},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.20770159363746643},{"id":"https://openalex.org/keywords/fourier-transform","display_name":"Fourier transform","score":0.13252803683280945},{"id":"https://openalex.org/keywords/mathematical-analysis","display_name":"Mathematical analysis","score":0.08251982927322388}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7643191814422607},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.7577216625213623},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5357649922370911},{"id":"https://openalex.org/C181002996","wikidata":"https://www.wikidata.org/wiki/Q1611641","display_name":"Overlap\u2013add method","level":5,"score":0.49857544898986816},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4193279445171356},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3812931478023529},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.3687945604324341},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.22657933831214905},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.20770159363746643},{"id":"https://openalex.org/C102519508","wikidata":"https://www.wikidata.org/wiki/Q6520159","display_name":"Fourier transform","level":2,"score":0.13252803683280945},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.08251982927322388},{"id":"https://openalex.org/C203024314","wikidata":"https://www.wikidata.org/wiki/Q1365258","display_name":"Fourier analysis","level":3,"score":0.0},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.0},{"id":"https://openalex.org/C76563020","wikidata":"https://www.wikidata.org/wiki/Q4817582","display_name":"Fractional Fourier transform","level":4,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3632956","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3632956","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3632956","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3632956","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3632956","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3632956","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1989434931","display_name":null,"funder_award_id":"2023M733566","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G2082826544","display_name":null,"funder_award_id":"Postdoctoral","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2433750033","display_name":null,"funder_award_id":"62090024, 62232015, and 62302479","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2674912097","display_name":null,"funder_award_id":"6209002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2942767815","display_name":null,"funder_award_id":"2021ZD0110101","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4478266629","display_name":null,"funder_award_id":"62090024","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4967579892","display_name":null,"funder_award_id":"E361010","funder_id":"https://openalex.org/F4320335561","funder_display_name":"Institute of Computing Technology, Chinese Academy of Sciences"},{"id":"https://openalex.org/G5030940285","display_name":null,"funder_award_id":"2023M","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G68136634","display_name":null,"funder_award_id":"2090024","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7223888118","display_name":null,"funder_award_id":"62302479","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7861041779","display_name":null,"funder_award_id":"6230247","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8136545603","display_name":null,"funder_award_id":"62232015","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8513333314","display_name":null,"funder_award_id":"2021ZD01","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8744364856","display_name":null,"funder_award_id":"110101","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"},{"id":"https://openalex.org/F4320335561","display_name":"Institute of Computing Technology, Chinese Academy of Sciences","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4388763336.pdf","grobid_xml":"https://content.openalex.org/works/W4388763336.grobid-xml"},"referenced_works_count":34,"referenced_works":["https://openalex.org/W1487564550","https://openalex.org/W1581253957","https://openalex.org/W2119144962","https://openalex.org/W2163605009","https://openalex.org/W2319920447","https://openalex.org/W2558687840","https://openalex.org/W2582996697","https://openalex.org/W2585550685","https://openalex.org/W2618939455","https://openalex.org/W2750822049","https://openalex.org/W2751343396","https://openalex.org/W2787884921","https://openalex.org/W2809624076","https://openalex.org/W2886014761","https://openalex.org/W2890887549","https://openalex.org/W2913114028","https://openalex.org/W2914237411","https://openalex.org/W2921118685","https://openalex.org/W2949833565","https://openalex.org/W2950649068","https://openalex.org/W2970958999","https://openalex.org/W3008378296","https://openalex.org/W3098382995","https://openalex.org/W3138433673","https://openalex.org/W3138811091","https://openalex.org/W4212774754","https://openalex.org/W4236965008","https://openalex.org/W4248927948","https://openalex.org/W4293584584","https://openalex.org/W4300906944","https://openalex.org/W4301409532","https://openalex.org/W4385342469","https://openalex.org/W6750227808","https://openalex.org/W6751609549"],"related_works":["https://openalex.org/W4372260258","https://openalex.org/W2267589039","https://openalex.org/W2369791303","https://openalex.org/W2759540840","https://openalex.org/W2133280289","https://openalex.org/W2360069155","https://openalex.org/W2169963286","https://openalex.org/W4254230825","https://openalex.org/W2919798019","https://openalex.org/W2293685972"],"abstract_inverted_index":{"Low-precision":[0],"computation":[1,33,101],"has":[2,17,34],"emerged":[3],"as":[4,43],"one":[5],"of":[6,99,134],"the":[7,44,70,75,97,144],"most":[8],"effective":[9,57],"techniques":[10],"for":[11],"accelerating":[12,28],"convolutional":[13,29,117],"neural":[14,30,120],"networks":[15],"and":[16,119,136],"garnered":[18],"widespread":[19],"support":[20],"on":[21,102,108],"modern":[22,103],"hardware.":[23],"Despite":[24],"its":[25],"effectiveness":[26],"in":[27,69,143],"networks,":[31],"low-precision":[32,100],"not":[35],"been":[36],"commonly":[37],"applied":[38],"to":[39,48,73,94],"fast":[40],"convolutions,":[41],"such":[42],"Winograd":[45,59,71],"algorithm,":[46],"due":[47],"numerical":[49],"issues.":[50],"In":[51],"this":[52],"article,":[53],"we":[54,82],"propose":[55],"an":[56,65,84,132],"quantized":[58],"convolution,":[60],"named":[61],"LoWino,":[62],"which":[63],"employs":[64],"in-side":[66],"quantization":[67],"method":[68],"domain":[72],"reduce":[74],"precision":[76],"loss":[77,150],"caused":[78],"by":[79],"transformations.":[80],"Meanwhile,":[81],"present":[83],"efficient":[85],"implementation":[86],"that":[87,127],"integrates":[88],"well-designed":[89],"optimization":[90],"techniques,":[91],"allowing":[92],"us":[93],"fully":[95],"exploit":[96],"capabilities":[98],"CPUs.":[104],"We":[105],"evaluate":[106],"LoWino":[107],"two":[109],"Intel":[110],"Xeon":[111],"Scalable":[112],"Processor":[113],"platforms":[114],"with":[115],"representative":[116],"layers":[118],"network":[121],"models.":[122],"The":[123],"experimental":[124],"results":[125],"demonstrate":[126],"our":[128],"approach":[129],"can":[130],"achieve":[131],"average":[133],"1.84\u00d7":[135],"1.91\u00d7":[137],"operator":[138],"speedups":[139],"over":[140],"state-of-the-art":[141],"implementations":[142],"vendor":[145],"library":[146],"while":[147],"preserving":[148],"accuracy":[149],"at":[151],"a":[152],"reasonable":[153],"level.":[154]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
