{"id":"https://openalex.org/W4388031352","doi":"https://doi.org/10.1145/3581784.3607107","title":"Optimizing Direct Convolutions on ARM Multi-Cores","display_name":"Optimizing Direct Convolutions on ARM Multi-Cores","publication_year":2023,"publication_date":"2023-10-30","ids":{"openalex":"https://openalex.org/W4388031352","doi":"https://doi.org/10.1145/3581784.3607107"},"language":"en","primary_location":{"id":"doi:10.1145/3581784.3607107","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581784.3607107","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092188956","display_name":"Pengyu Wang","orcid":"https://orcid.org/0000-0003-2805-0862"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Pengyu Wang","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078057947","display_name":"Weiling Yang","orcid":"https://orcid.org/0000-0001-7167-4086"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiling Yang","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083171604","display_name":"Jianbin Fang","orcid":"https://orcid.org/0000-0003-3542-4869"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianbin Fang","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006729432","display_name":"Dezun Dong","orcid":"https://orcid.org/0000-0001-6243-8479"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dezun Dong","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101626850","display_name":"Chun Huang","orcid":"https://orcid.org/0000-0002-0317-8192"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chun Huang","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092019932","display_name":"Peng Zhang","orcid":"https://orcid.org/0000-0001-8364-9793"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Zhang","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103218034","display_name":"Tao Tang","orcid":"https://orcid.org/0009-0009-2883-6997"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Tang","raw_affiliation_strings":["College of Computer Science and Technology, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100401045","display_name":"Zheng Wang","orcid":"https://orcid.org/0000-0001-6157-0662"},"institutions":[{"id":"https://openalex.org/I130828816","display_name":"University of Leeds","ror":"https://ror.org/024mrxd33","country_code":"GB","type":"education","lineage":["https://openalex.org/I130828816"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Zheng Wang","raw_affiliation_strings":["School of Computing, University of Leeds, Leeds, United Kingdom"],"affiliations":[{"raw_affiliation_string":"School of Computing, University of Leeds, Leeds, United Kingdom","institution_ids":["https://openalex.org/I130828816"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5092188956"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.5994,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.69891659,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"13"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.8113440275192261},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8105056285858154},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.8010283708572388},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5738584399223328},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.45224061608314514},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.4236447811126709},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34955161809921265},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3273118734359741},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0843198299407959},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.06933000683784485}],"concepts":[{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.8113440275192261},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8105056285858154},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.8010283708572388},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5738584399223328},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.45224061608314514},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4236447811126709},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34955161809921265},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3273118734359741},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0843198299407959},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.06933000683784485},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3581784.3607107","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581784.3607107","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.whiterose.ac.uk:202768","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306400854","display_name":"White Rose Research Online (University of Leeds, The University of Sheffield, University of York)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2800616092","host_organization_name":"White Rose University Consortium","host_organization_lineage":["https://openalex.org/I2800616092"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Proceedings Paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G503725912","display_name":null,"funder_award_id":"61972408","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6714216625","display_name":null,"funder_award_id":"2021YFB0300101","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1005811612","https://openalex.org/W1832693441","https://openalex.org/W2040062623","https://openalex.org/W2073061372","https://openalex.org/W2172654076","https://openalex.org/W2193145675","https://openalex.org/W2194775991","https://openalex.org/W2395611524","https://openalex.org/W2530879419","https://openalex.org/W2531409750","https://openalex.org/W2551151849","https://openalex.org/W2560023338","https://openalex.org/W2605019159","https://openalex.org/W2728256789","https://openalex.org/W2734941459","https://openalex.org/W2786374423","https://openalex.org/W2951894856","https://openalex.org/W2961619211","https://openalex.org/W2969414999","https://openalex.org/W2978452617","https://openalex.org/W2995113061","https://openalex.org/W3008010570","https://openalex.org/W3012249773","https://openalex.org/W3016542674","https://openalex.org/W3042343931","https://openalex.org/W3086476857","https://openalex.org/W3106250896","https://openalex.org/W3123054690","https://openalex.org/W3126707779","https://openalex.org/W3127904641","https://openalex.org/W3156745629","https://openalex.org/W3176085048","https://openalex.org/W4220822006","https://openalex.org/W4244254628","https://openalex.org/W4282959697","https://openalex.org/W4288072573","https://openalex.org/W4311543031","https://openalex.org/W4312069271","https://openalex.org/W4389961221","https://openalex.org/W6600662749","https://openalex.org/W6603808522","https://openalex.org/W6821285518"],"related_works":["https://openalex.org/W4387838477","https://openalex.org/W2067193074","https://openalex.org/W3034421924","https://openalex.org/W2982536526","https://openalex.org/W4386858688","https://openalex.org/W4380302312","https://openalex.org/W3008689640","https://openalex.org/W4390971171","https://openalex.org/W4385338604","https://openalex.org/W3081626085"],"abstract_inverted_index":{"Convolution":[0],"kernels":[1,105],"are":[2,10],"widely":[3],"seen":[4],"in":[5,62],"deep":[6,81],"learning":[7,82],"workloads":[8],"and":[9,64,94,106,138],"often":[11],"responsible":[12],"for":[13,38,44,88],"performance":[14,45,109,134],"bottlenecks.":[15],"Recent":[16],"research":[17],"has":[18],"demonstrated":[19],"that":[20,55,128],"a":[21,50],"direct":[22,39,52],"convolution":[23,29,40,53,104,122],"approach":[24,54],"can":[25],"outperform":[26],"the":[27,74,89,131],"traditional":[28],"implementation":[30],"based":[31],"on":[32,110],"tensor-to-matrix":[33],"conversions.":[34],"However,":[35],"existing":[36],"approaches":[37],"still":[41],"have":[42],"room":[43],"improvement.":[46],"We":[47,96,117],"present":[48],"nDirect,":[49],"new":[51,86],"targets":[56],"ARM-based":[57],"multi-core":[58,114],"CPUs":[59],"commonly":[60],"found":[61],"smartphones":[63],"HPC":[65],"systems.":[66],"nDirect":[67,98,119,129],"is":[68],"designed":[69],"to":[70,102],"be":[71],"compatible":[72],"with":[73],"data":[75,92],"layout":[76],"formats":[77],"used":[78],"by":[79,99],"mainstream":[80],"frameworks":[83],"but":[84],"offers":[85],"optimizations":[87],"computational":[90],"kernel,":[91],"packing,":[93],"parallelization.":[95],"evaluate":[97],"applying":[100],"it":[101],"representative":[103],"demonstrating":[107],"its":[108],"four":[111],"distinct":[112],"ARM":[113],"CPU":[115],"platforms.":[116,139],"compare":[118],"against":[120],"state-of-the-art":[121],"optimization":[123],"techniques.":[124],"Experimental":[125],"results":[126],"show":[127],"gives":[130],"best":[132],"overall":[133],"across":[135],"evaluation":[136],"scenarios":[137]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
