{"id":"https://openalex.org/W3204278478","doi":"https://doi.org/10.1145/3472456.3472464","title":"LoWino: Towards Efficient Low-Precision Winograd Convolutions on Modern CPUs","display_name":"LoWino: Towards Efficient Low-Precision Winograd Convolutions on Modern CPUs","publication_year":2021,"publication_date":"2021-08-09","ids":{"openalex":"https://openalex.org/W3204278478","doi":"https://doi.org/10.1145/3472456.3472464","mag":"3204278478"},"language":"en","primary_location":{"id":"doi:10.1145/3472456.3472464","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3472456.3472464","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3472456.3472464","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"50th International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3472456.3472464","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100607669","display_name":"Guangli Li","orcid":"https://orcid.org/0000-0002-9738-261X"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guangli Li","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences and University of Chinese Academy of Sciences, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences and University of Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023640358","display_name":"Zhen Jia","orcid":"https://orcid.org/0000-0002-6810-2279"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhen Jia","raw_affiliation_strings":["Amazon, United States of America"],"affiliations":[{"raw_affiliation_string":"Amazon, United States of America","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053070701","display_name":"Xiaobing Feng","orcid":"https://orcid.org/0000-0003-2909-7750"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaobing Feng","raw_affiliation_strings":["Institute of Computing Technology, Chinese Academy of Sciences and University of Chinese Academy of Sciences, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, Chinese Academy of Sciences and University of Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101638214","display_name":"Yida Wang","orcid":"https://orcid.org/0000-0001-8165-840X"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yida Wang","raw_affiliation_strings":["Amazon, United States of America"],"affiliations":[{"raw_affiliation_string":"Amazon, United States of America","institution_ids":["https://openalex.org/I1311688040"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100607669"],"corresponding_institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.8646,"has_fulltext":true,"cited_by_count":13,"citation_normalized_percentile":{"value":0.75923203,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.8251020908355713},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.80647873878479},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.6709882020950317},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6514146327972412},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5601626634597778},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5496689081192017},{"id":"https://openalex.org/keywords/xeon-phi","display_name":"Xeon Phi","score":0.5425065755844116},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5400441884994507},{"id":"https://openalex.org/keywords/xeon","display_name":"Xeon","score":0.5396040081977844},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.47026047110557556},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4318719804286957},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3870645761489868},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.23422753810882568},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.21347707509994507}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.8251020908355713},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.80647873878479},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.6709882020950317},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6514146327972412},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5601626634597778},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5496689081192017},{"id":"https://openalex.org/C96972482","wikidata":"https://www.wikidata.org/wiki/Q1049168","display_name":"Xeon Phi","level":2,"score":0.5425065755844116},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5400441884994507},{"id":"https://openalex.org/C145108525","wikidata":"https://www.wikidata.org/wiki/Q656154","display_name":"Xeon","level":2,"score":0.5396040081977844},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.47026047110557556},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4318719804286957},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3870645761489868},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.23422753810882568},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21347707509994507},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3472456.3472464","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3472456.3472464","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3472456.3472464","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"50th International Conference on Parallel Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3472456.3472464","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3472456.3472464","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3472456.3472464","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"50th International Conference on Parallel Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3204278478.pdf","grobid_xml":"https://content.openalex.org/works/W3204278478.grobid-xml"},"referenced_works_count":37,"referenced_works":["https://openalex.org/W1487564550","https://openalex.org/W1686810756","https://openalex.org/W1901129140","https://openalex.org/W1922123711","https://openalex.org/W2065710348","https://openalex.org/W2097117768","https://openalex.org/W2108598243","https://openalex.org/W2123838014","https://openalex.org/W2163605009","https://openalex.org/W2172654076","https://openalex.org/W2194775991","https://openalex.org/W2319920447","https://openalex.org/W2540279855","https://openalex.org/W2582996697","https://openalex.org/W2618939455","https://openalex.org/W2734941459","https://openalex.org/W2786374423","https://openalex.org/W2787884921","https://openalex.org/W2809624076","https://openalex.org/W2890071283","https://openalex.org/W2950649068","https://openalex.org/W2962761403","https://openalex.org/W2963122961","https://openalex.org/W2963947383","https://openalex.org/W2964299589","https://openalex.org/W2966126880","https://openalex.org/W2970971581","https://openalex.org/W2997109118","https://openalex.org/W2998218113","https://openalex.org/W2998957070","https://openalex.org/W3008378296","https://openalex.org/W3015869703","https://openalex.org/W3018661492","https://openalex.org/W3036648875","https://openalex.org/W3098382995","https://openalex.org/W3123639931","https://openalex.org/W3139307480"],"related_works":["https://openalex.org/W2739740241","https://openalex.org/W1974923383","https://openalex.org/W947442053","https://openalex.org/W2085105049","https://openalex.org/W2475524688","https://openalex.org/W2526069705","https://openalex.org/W2024016913","https://openalex.org/W2019153376","https://openalex.org/W2981664121","https://openalex.org/W2796552083"],"abstract_inverted_index":{"Low-precision":[0],"computation,":[1,40],"which":[2,68],"has":[3],"been":[4],"widely":[5,29],"supported":[6],"in":[7,74,122,141],"contemporary":[8],"hardware,":[9],"is":[10,27],"considered":[11],"as":[12],"one":[13],"of":[14,102],"the":[15,43,75,80,100,142,147],"most":[16],"effective":[17],"methods":[18],"to":[19,31,42,78,134],"accelerate":[20],"convolutional":[21,120],"neural":[22,125],"networks.":[23,126],"However,":[24],"low-precision":[25,59,103],"computation":[26,104],"not":[28],"used":[30],"speed":[32],"up":[33,133],"Winograd,":[34],"an":[35,89],"algorithm":[36],"for":[37],"fast":[38],"convolution":[39,61],"due":[41],"numerical":[44],"error":[45],"introduced":[46],"by":[47,84,117],"combining":[48],"Winograd":[49,60,76],"transformation":[50],"and":[51],"quantization.":[52],"In":[53],"this":[54],"paper,":[55],"we":[56,87],"propose":[57],"a":[58,70,150],"approach,":[62],"LoWino,":[63],"based":[64],"on":[65,105,112],"post-training":[66],"quantization,":[67],"employs":[69],"linear":[71],"quantization":[72],"method":[73],"domain":[77],"reduce":[79],"precision":[81],"loss":[82],"caused":[83],"transformations.":[85],"Moreover,":[86],"present":[88],"efficient":[90],"implementation":[91],"that":[92,130],"integrates":[93],"well-designed":[94],"optimization":[95],"techniques,":[96],"thereby":[97],"adequately":[98],"exploiting":[99],"capability":[101],"modern":[106],"CPUs.":[107],"We":[108],"evaluate":[109],"our":[110],"approach":[111],"Intel":[113],"Xeon":[114],"Scalable":[115],"Processors":[116],"leveraging":[118],"representative":[119],"layers":[121],"prevailing":[123],"deep":[124],"Experimental":[127],"results":[128],"show":[129],"LoWino":[131],"achieves":[132],"2.04":[135],"\u00d7":[136],"speedup":[137],"over":[138],"state-of-the-art":[139],"implementations":[140],"vendor":[143],"library":[144],"while":[145],"maintaining":[146],"accuracy":[148],"at":[149],"reasonable":[151],"level.":[152]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
