{"id":"https://openalex.org/W4210559098","doi":"https://doi.org/10.1109/tpds.2022.3146257","title":"Automatic Generation of High-Performance Convolution Kernels on ARM CPUs for Deep Learning","display_name":"Automatic Generation of High-Performance Convolution Kernels on ARM CPUs for Deep Learning","publication_year":2022,"publication_date":"2022-01-27","ids":{"openalex":"https://openalex.org/W4210559098","doi":"https://doi.org/10.1109/tpds.2022.3146257"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2022.3146257","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2022.3146257","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.osti.gov/servlets/purl/1863284","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056275783","display_name":"Jintao Meng","orcid":"https://orcid.org/0000-0002-6208-4102"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jintao Meng","raw_affiliation_strings":["Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101520148","display_name":"Zhuang Chen","orcid":"https://orcid.org/0000-0003-0691-894X"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chen Zhuang","raw_affiliation_strings":["Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100338479","display_name":"Peng Chen","orcid":"https://orcid.org/0000-0003-1244-3151"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]},{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Peng Chen","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology (AIST), Tokyo, Japan","RIKEN Center for Computational Science (R-CCS), Kobe, Hyogo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology (AIST), Tokyo, Japan","institution_ids":["https://openalex.org/I73613424"]},{"raw_affiliation_string":"RIKEN Center for Computational Science (R-CCS), Kobe, Hyogo, Japan","institution_ids":["https://openalex.org/I4210129730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002208999","display_name":"Mohamed Wahib","orcid":"https://orcid.org/0000-0002-7165-2095"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]},{"id":"https://openalex.org/I73613424","display_name":"National Institute of Advanced Industrial Science and Technology","ror":"https://ror.org/01703db54","country_code":"JP","type":"government","lineage":["https://openalex.org/I73613424"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Mohamed Wahib","raw_affiliation_strings":["National Institute of Advanced Industrial Science and Technology (AIST), Tokyo, Japan","RIKEN Center for Computational Science (R-CCS), Kobe, Hyogo, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Advanced Industrial Science and Technology (AIST), Tokyo, Japan","institution_ids":["https://openalex.org/I73613424"]},{"raw_affiliation_string":"RIKEN Center for Computational Science (R-CCS), Kobe, Hyogo, Japan","institution_ids":["https://openalex.org/I4210129730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020388832","display_name":"Bertil Schmidt","orcid":"https://orcid.org/0000-0003-2597-8331"},"institutions":[{"id":"https://openalex.org/I197323543","display_name":"Johannes Gutenberg University Mainz","ror":"https://ror.org/023b0x485","country_code":"DE","type":"education","lineage":["https://openalex.org/I197323543"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Bertil Schmidt","raw_affiliation_strings":["Institute of Computer Science, Johannes Gutenberg University Mainz, Mainz, Germany"],"affiliations":[{"raw_affiliation_string":"Institute of Computer Science, Johannes Gutenberg University Mainz, Mainz, Germany","institution_ids":["https://openalex.org/I197323543"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100411431","display_name":"Xiao Wang","orcid":"https://orcid.org/0000-0001-6545-1943"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiao Wang","raw_affiliation_strings":["Oak Ridge National Laboratory, Oak Ridge, TN, USA"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, TN, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110799964","display_name":"Haidong Lan","orcid":"https://orcid.org/0000-0002-5231-4015"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haidong Lan","raw_affiliation_strings":["Tencent AI Lab, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051506217","display_name":"Du Wu","orcid":"https://orcid.org/0000-0002-4002-0837"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dou Wu","raw_affiliation_strings":["Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083508620","display_name":"Minwen Deng","orcid":"https://orcid.org/0009-0005-8122-3023"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minwen Deng","raw_affiliation_strings":["Tencent AI Lab, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037823825","display_name":"Yanjie Wei","orcid":"https://orcid.org/0000-0002-4791-7540"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanjie Wei","raw_affiliation_strings":["Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Institutes of Advanced Technology, Chinese Academy of Sciences, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210145761"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101952645","display_name":"Shengzhong Feng","orcid":"https://orcid.org/0000-0002-6225-2815"},"institutions":[{"id":"https://openalex.org/I4210112812","display_name":"National Supercomputing Center in Shenzhen","ror":"https://ror.org/02291hh73","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210112812"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengzhong Feng","raw_affiliation_strings":["National Supercomputer Center in Shenzhen, Shenzhen, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"National Supercomputer Center in Shenzhen, Shenzhen, Guangdong, China","institution_ids":["https://openalex.org/I4210112812"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5056275783"],"corresponding_institution_ids":["https://openalex.org/I4210145761"],"apc_list":null,"apc_paid":null,"fwci":1.6333,"has_fulltext":true,"cited_by_count":16,"citation_normalized_percentile":{"value":0.84393834,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"33","issue":"11","first_page":"2885","last_page":"2899"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.9243372678756714},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8202592134475708},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7263875007629395},{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.6698026061058044},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6109753847122192},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.5568260550498962},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5005154609680176},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.42923402786254883},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4200270473957062},{"id":"https://openalex.org/keywords/loop-unrolling","display_name":"Loop unrolling","score":0.41561299562454224},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.41220685839653015},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38462814688682556},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.348978728055954},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3457614779472351},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.25970977544784546},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.19525909423828125},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10923761129379272}],"concepts":[{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.9243372678756714},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8202592134475708},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7263875007629395},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.6698026061058044},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6109753847122192},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.5568260550498962},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5005154609680176},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.42923402786254883},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4200270473957062},{"id":"https://openalex.org/C76970557","wikidata":"https://www.wikidata.org/wiki/Q1869750","display_name":"Loop unrolling","level":3,"score":0.41561299562454224},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.41220685839653015},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38462814688682556},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.348978728055954},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3457614779472351},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.25970977544784546},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.19525909423828125},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10923761129379272},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpds.2022.3146257","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2022.3146257","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},{"id":"pmh:oai:osti.gov:1863284","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1863284","pdf_url":"https://www.osti.gov/servlets/purl/1863284","source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"pmh:oai:osti.gov:1863284","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1863284","pdf_url":"https://www.osti.gov/servlets/purl/1863284","source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1121271761","display_name":null,"funder_award_id":"Program","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1213624959","display_name":null,"funder_award_id":"2011DP173015","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G1231421488","display_name":null,"funder_award_id":"under","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G186330105","display_name":"Geometric Aspects of Operator Space Theory","funder_award_id":"0200714","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G1992415837","display_name":null,"funder_award_id":"131060","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2042897603","display_name":null,"funder_award_id":"DE-AC05-00OR2272","funder_id":"https://openalex.org/F4320316892","funder_display_name":"UT-Battelle"},{"id":"https://openalex.org/G2075189144","display_name":null,"funder_award_id":"DE-AC05-00OR22725","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2100062414","display_name":"STRUCTURAL RELATIONS BETWEEN THE MID-PACIFIC OCEANIC  RIDGES AND FRACTURE ZONES","funder_award_id":"7141147","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2298714541","display_name":null,"funder_award_id":"2020071","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2981938667","display_name":null,"funder_award_id":"Shenzhen","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3346161132","display_name":null,"funder_award_id":"2018YFB0204403","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3417309122","display_name":"RAPID: Ethnographic field study of engineering  knowledge systems: Developing a  social-process-centric situated model","funder_award_id":"1642022","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3719401238","display_name":null,"funder_award_id":"1813203","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G37568934","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G384178317","display_name":null,"funder_award_id":"02008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4020255992","display_name":null,"funder_award_id":"Project","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5726405315","display_name":null,"funder_award_id":"DE-AC05","funder_id":"https://openalex.org/F4320306250","funder_display_name":"Battelle"},{"id":"https://openalex.org/G5750115239","display_name":null,"funder_award_id":"U1813203","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6129992089","display_name":null,"funder_award_id":"DE-AC05-","funder_id":"https://openalex.org/F4320316892","funder_display_name":"UT-Battelle"},{"id":"https://openalex.org/G6495930337","display_name":null,"funder_award_id":"DE-AC05-00OR22725","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6750695397","display_name":"RI: Small: Representation Learning for Semantic Mapping and Safe Robot Navigation","funder_award_id":"2007141","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6864165199","display_name":null,"funder_award_id":"DE-AC05-00OR22725","funder_id":"https://openalex.org/F4320306250","funder_display_name":"Battelle"},{"id":"https://openalex.org/G7440325224","display_name":"Collaborative Research: Connecting Professional and Educational Communities to Prepare Construction Engineering Students for the Workplace","funder_award_id":"2201642","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7726157001","display_name":null,"funder_award_id":"Grant No.","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7924219151","display_name":null,"funder_award_id":"YX202007","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8161904097","display_name":null,"funder_award_id":"202008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8367034557","display_name":"An Analog of Extremal Length (Mathematical Sciences)","funder_award_id":"8201131","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G839331110","display_name":null,"funder_award_id":"TD202008","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8813984943","display_name":null,"funder_award_id":"DE-AC05-00OR22725","funder_id":"https://openalex.org/F4320316892","funder_display_name":"UT-Battelle"},{"id":"https://openalex.org/G8943143067","display_name":null,"funder_award_id":"AC05-00OR22725","funder_id":"https://openalex.org/F4320316892","funder_display_name":"UT-Battelle"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"},{"id":"https://openalex.org/F4320306250","display_name":"Battelle","ror":"https://ror.org/01h5tnr73"},{"id":"https://openalex.org/F4320316892","display_name":"UT-Battelle","ror":"https://ror.org/04nza6677"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4210559098.pdf"},"referenced_works_count":53,"referenced_works":["https://openalex.org/W404524222","https://openalex.org/W1563795667","https://openalex.org/W2002555321","https://openalex.org/W2043275593","https://openalex.org/W2073061372","https://openalex.org/W2111619626","https://openalex.org/W2119144962","https://openalex.org/W2138215414","https://openalex.org/W2154865023","https://openalex.org/W2170866695","https://openalex.org/W2172654076","https://openalex.org/W2279098554","https://openalex.org/W2280574045","https://openalex.org/W2499931820","https://openalex.org/W2620838383","https://openalex.org/W2736099364","https://openalex.org/W2738029666","https://openalex.org/W2741554038","https://openalex.org/W2768696376","https://openalex.org/W2786374423","https://openalex.org/W2787513823","https://openalex.org/W2804500013","https://openalex.org/W2809021737","https://openalex.org/W2951894856","https://openalex.org/W2963973518","https://openalex.org/W2964350391","https://openalex.org/W2971383048","https://openalex.org/W2998957070","https://openalex.org/W3008378296","https://openalex.org/W3011121930","https://openalex.org/W3024332882","https://openalex.org/W3038006402","https://openalex.org/W3113046600","https://openalex.org/W3118616291","https://openalex.org/W3133174490","https://openalex.org/W3164323614","https://openalex.org/W3210601829","https://openalex.org/W4244254628","https://openalex.org/W4251637954","https://openalex.org/W4288359923","https://openalex.org/W4295312788","https://openalex.org/W4302296459","https://openalex.org/W6633802082","https://openalex.org/W6637151318","https://openalex.org/W6677580257","https://openalex.org/W6695314431","https://openalex.org/W6741502269","https://openalex.org/W6748408460","https://openalex.org/W6752057402","https://openalex.org/W6762060668","https://openalex.org/W6766978945","https://openalex.org/W6774015895","https://openalex.org/W7011181696"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W107105315","https://openalex.org/W2146343568","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W2121492601","https://openalex.org/W3157543420","https://openalex.org/W1188123746","https://openalex.org/W2352677973"],"abstract_inverted_index":{"We":[0],"present":[1],"<italic":[2],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[3],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">FastConv</i>":[4],",":[5],"a":[6,50,103,144],"template-based":[7],"code":[8],"auto-generation":[9],"open-source":[10],"library":[11],"that":[12,184],"can":[13],"automatically":[14],"generate":[15],"high-performance":[16],"deep":[17],"learning":[18],"convolution":[19,74,77,181],"kernels":[20,95],"of":[21,43,53,68,79,93,114,120,220],"arbitrary":[22],"matrices/tensors":[23],"shapes.":[24,81],"FastConv":[25,82,107,185],"is":[26,33,131,164],"based":[27],"on":[28,172,205,212,228],"the":[29,35,40,66,111,135,140],"Winograd":[30,204],"algorithm,":[31],"which":[32],"reportedly":[34],"highest":[36],"performing":[37],"algorithm":[38],"for":[39,76,98,110,139,143],"time-consuming":[41],"layers":[42,78],"convolutional":[44],"neural":[45],"networks.":[46],"ARM":[47,168,199],"CPUs":[48],"cover":[49],"wide":[51],"range":[52],"designs":[54],"and":[55,127,148,159,161,170,191,198,226,237],"specifications,":[56],"from":[57],"embedded":[58],"devices":[59],"to":[60,65,70,89,133,154,157,188,193],"HPC-grade":[61],"CPUs.":[62],"The":[63],"leads":[64],"dilemma":[67],"how":[69],"consistently":[71],"optimize":[72],"Winograd-based":[73],"solvers":[75],"different":[80],"addresses":[83],"this":[84],"problem":[85,149],"by":[86],"using":[87,203],"templates":[88],"auto-generate":[90],"multiple":[91],"shapes":[92,182],"tuned":[94],"variants":[96],"suitable":[97],"skinny":[99],"tall":[100],"matrices.":[101],"As":[102],"performance":[104,142,176,209],"portable":[105],"library,":[106],"transparently":[108],"searches":[109],"best":[112,141],"combination":[113],"kernel":[115],"shapes,":[116],"cache":[117],"tiles,":[118],"scheduling":[119],"loop":[121],"orders,":[122],"packing":[123],"strategies,":[124],"access":[125],"patterns,":[126],"online/offline":[128],"computations.":[129],"Auto-tuning":[130],"used":[132],"search":[134],"parameter":[136],"configuration":[137],"space":[138],"given":[145],"target":[146],"architecture":[147],"size.":[150],"Results":[151],"show":[152,183,214],"1.02x":[153],"1.40x,":[155],"1.14x":[156],"2.17x,":[158],"1.22x":[160],"2.48x":[162],"speedup":[163,190,195,217],"achieved":[165],"over":[166,196,218],"NNPACK,":[167],"NN,":[169],"FeatherCNN":[171],"Kunpeng":[173,206,229],"920.":[174,207],"Furthermore,":[175],"portability":[177,210],"experiments":[178],"with":[179],"various":[180],"achieves":[186],"1.2x":[187],"1.7x":[189],"2x":[192],"22x":[194],"NNPACK":[197,219],"NN":[200],"inference":[201],"engine":[202],"CPU":[208],"evaluation":[211],"VGG\u201316":[213],"an":[215],"average":[216],"1.42x,":[221],"1.21x,":[222],"1.26x,":[223],"1.37x,":[224],"2.26x,":[225],"11.02x":[227],"920,":[230],"Snapdragon":[231],"835,":[232],"855,":[233],"888,":[234],"Apple":[235],"M1,":[236],"AWS":[238],"Graviton2,":[239],"respectively.":[240]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2025-10-10T00:00:00"}
