{"id":"https://openalex.org/W7137985092","doi":"https://doi.org/10.1609/aaai.v40i26.39288","title":"AirWino: Optimized Winograd Convolution for Accelerating CNN Inference on ARMv8 Processors","display_name":"AirWino: Optimized Winograd Convolution for Accelerating CNN Inference on ARMv8 Processors","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137985092","doi":"https://doi.org/10.1609/aaai.v40i26.39288"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i26.39288","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i26.39288","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39288/43249","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39288/43249","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066618963","display_name":"Heng Gui","orcid":"https://orcid.org/0000-0002-0946-1589"},"institutions":[{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haoyuan Gui","raw_affiliation_strings":["University of the Chinese Academy of Sciences,\nInstitute of Software, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"University of the Chinese Academy of Sciences,\nInstitute of Software, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210128818"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129687557","display_name":"Xiaoyu Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoyu Zhang","raw_affiliation_strings":["University of the Chinese Academy of Sciences,\nInstitute of Software, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"University of the Chinese Academy of Sciences,\nInstitute of Software, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210128818"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129725849","display_name":"Yifan Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yifan Zhang","raw_affiliation_strings":["University of the Chinese Academy of Sciences,\nInstitute of Software, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"University of the Chinese Academy of Sciences,\nInstitute of Software, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210128818"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129730812","display_name":"Ximeng Fu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ximeng Fu","raw_affiliation_strings":["University of the Chinese Academy of Sciences,\nInstitute of Software, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"University of the Chinese Academy of Sciences,\nInstitute of Software, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210128818"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102965956","display_name":"Shiqi Sun","orcid":"https://orcid.org/0000-0002-8483-0110"},"institutions":[{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiqi Sun","raw_affiliation_strings":["University of the Chinese Academy of Sciences,\nInstitute of Software, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"University of the Chinese Academy of Sciences,\nInstitute of Software, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210128818"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047616850","display_name":"L Li","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Leisheng Li","raw_affiliation_strings":["Institute of Software, Chinese Academy of Sciences,\nKey Laboratory of System Software, Institute of Software, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Software, Chinese Academy of Sciences,\nKey Laboratory of System Software, Institute of Software, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210128818"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100637653","display_name":"Huiyuan Li","orcid":"https://orcid.org/0000-0001-6789-8693"},"institutions":[{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huiyuan Li","raw_affiliation_strings":["Institute of Software, Chinese Academy of Sciences,\nKey Laboratory of System Software, Institute of Software, Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Software, Chinese Academy of Sciences,\nKey Laboratory of System Software, Institute of Software, Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210128818"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5066618963"],"corresponding_institution_ids":["https://openalex.org/I4210128818"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25297619,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"26","first_page":"21414","last_page":"21422"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.6531999707221985,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.6531999707221985,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.03350000083446503,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.025200000032782555,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.7483000159263611},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6105999946594238},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.48899999260902405},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.48890000581741333},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.47699999809265137},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.46160000562667847},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.45419999957084656},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.4327999949455261}],"concepts":[{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.7483000159263611},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7307999730110168},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6105999946594238},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.49639999866485596},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.48899999260902405},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.48890000581741333},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.47699999809265137},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.46160000562667847},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.45419999957084656},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.4327999949455261},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4300000071525574},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4239000082015991},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.3659000098705292},{"id":"https://openalex.org/C157899210","wikidata":"https://www.wikidata.org/wiki/Q1395022","display_name":"Convolutional code","level":3,"score":0.3424000144004822},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3352000117301941},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.325300008058548},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3237999975681305},{"id":"https://openalex.org/C186967261","wikidata":"https://www.wikidata.org/wiki/Q5082128","display_name":"Mobile device","level":2,"score":0.32359999418258667},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.31700000166893005},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.31150001287460327},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.3100000023841858},{"id":"https://openalex.org/C2777472644","wikidata":"https://www.wikidata.org/wiki/Q16968992","display_name":"Approximate inference","level":3,"score":0.28040000796318054},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.2799000144004822},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.2766999900341034},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.265500009059906},{"id":"https://openalex.org/C84462506","wikidata":"https://www.wikidata.org/wiki/Q173142","display_name":"Digital signal processing","level":2,"score":0.2653000056743622},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.2635999917984009},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.25429999828338623},{"id":"https://openalex.org/C181002996","wikidata":"https://www.wikidata.org/wiki/Q1611641","display_name":"Overlap\u2013add method","level":5,"score":0.2531000077724457}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i26.39288","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i26.39288","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39288/43249","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i26.39288","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i26.39288","pdf_url":"https://ojs.aaai.org/index.php/AAAI/article/download/39288/43249","source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1058664010","display_name":null,"funder_award_id":"12471348","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4020255992","display_name":null,"funder_award_id":"Project","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5161677271","display_name":null,"funder_award_id":"12471","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7726157001","display_name":null,"funder_award_id":"Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7137985092.pdf","grobid_xml":"https://content.openalex.org/works/W7137985092.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"As":[0],"Convolutional":[1],"Neural":[2],"Networks":[3],"(CNNs)":[4],"continue":[5],"to":[6,20],"gain":[7],"traction":[8],"in":[9,30,50],"deep":[10],"learning,":[11],"Winograd":[12,40,64,100],"convolution":[13,41,65],"has":[14],"emerged":[15],"as":[16],"a":[17,81,116],"key":[18],"algorithm":[19],"enhance":[21],"computational":[22,53],"efficiency.":[23],"Although":[24],"ARM-based":[25],"CPUs":[26],"are":[27],"increasingly":[28],"prevalent":[29],"mobile":[31],"devices,":[32],"embedded":[33],"systems":[34],"and":[35,55,92,98,105,113,138,144],"HPC":[36],"servers,":[37],"existing":[38],"2D":[39,97],"implementations":[42,66],"for":[43,48,67,95],"ARM":[44,68,125],"often":[45],"leave":[46],"room":[47],"improvement":[49],"transformation":[51],"efficiency,":[52],"throughput,":[54],"overall":[56],"versatility.":[57],"Furthermore,":[58],"the":[59,72],"lack":[60],"of":[61,75,83,111,119],"tailored":[62],"3D":[63,99],"architectures":[69],"stems":[70],"from":[71],"additional":[73],"complexity":[74],"supporting":[76],"higher-dimensional":[77],"kernels.":[78],"AirWino":[79,129],"introduces":[80],"set":[82],"novel":[84],"optimizations":[85],"covering":[86],"transformations,":[87],"data":[88],"layouts,":[89],"micro-kernel":[90],"computations,":[91],"parallelization":[93],"strategies":[94],"both":[96],"convolution.":[101],"It":[102],"supports":[103],"FP32":[104],"FP16":[106],"precisions":[107],"with":[108],"filter":[109],"sizes":[110],"3":[112],"5,":[114],"targeting":[115],"broad":[117],"range":[118],"applications.":[120],"Evaluations":[121],"on":[122],"four":[123],"distinct":[124],"platforms":[126],"show":[127],"that":[128],"consistently":[130],"outperforms":[131],"state-of-the-art":[132],"libraries":[133],"across":[134],"various":[135],"experimental":[136],"scenarios":[137],"hardware":[139],"configurations,":[140],"highlighting":[141],"its":[142],"efficiency":[143],"portability.":[145]},"counts_by_year":[],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2026-03-18T00:00:00"}
