{"id":"https://openalex.org/W2978452617","doi":"https://doi.org/10.1109/ijcnn.2019.8852012","title":"Parallel convolution algorithm using implicit matrix multiplication on multi-core CPUs","display_name":"Parallel convolution algorithm using implicit matrix multiplication on multi-core CPUs","publication_year":2019,"publication_date":"2019-07-01","ids":{"openalex":"https://openalex.org/W2978452617","doi":"https://doi.org/10.1109/ijcnn.2019.8852012","mag":"2978452617"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn.2019.8852012","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2019.8852012","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101913669","display_name":"Qinglin Wang","orcid":"https://orcid.org/0000-0002-8286-6566"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qinglin Wang","raw_affiliation_strings":["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066267776","display_name":"Songzhu Mei","orcid":"https://orcid.org/0000-0002-4926-5953"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Songzhu Mei","raw_affiliation_strings":["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100454211","display_name":"Jie Liu","orcid":"https://orcid.org/0000-0003-3745-7541"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Liu","raw_affiliation_strings":["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061134505","display_name":"Chunye Gong","orcid":"https://orcid.org/0000-0003-0349-1100"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunye Gong","raw_affiliation_strings":["Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"Science and Technology on Parallel and Distributed Processing Laboratory, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101913669"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.6073,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.72666283,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7817544341087341},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7283859848976135},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.7208454012870789},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.7099156975746155},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.6964749097824097},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5482608675956726},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4848445653915405},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.4848402738571167},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.4784238934516907},{"id":"https://openalex.org/keywords/multiplication-algorithm","display_name":"Multiplication algorithm","score":0.45577195286750793},{"id":"https://openalex.org/keywords/parallel-algorithm","display_name":"Parallel algorithm","score":0.41601794958114624},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.26585185527801514},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.22795233130455017},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16025730967521667},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.13826870918273926},{"id":"https://openalex.org/keywords/discrete-mathematics","display_name":"Discrete mathematics","score":0.08372935652732849}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7817544341087341},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7283859848976135},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.7208454012870789},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.7099156975746155},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.6964749097824097},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5482608675956726},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4848445653915405},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.4848402738571167},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.4784238934516907},{"id":"https://openalex.org/C201290732","wikidata":"https://www.wikidata.org/wiki/Q130762","display_name":"Multiplication algorithm","level":3,"score":0.45577195286750793},{"id":"https://openalex.org/C120373497","wikidata":"https://www.wikidata.org/wiki/Q1087987","display_name":"Parallel algorithm","level":2,"score":0.41601794958114624},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.26585185527801514},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.22795233130455017},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16025730967521667},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.13826870918273926},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.08372935652732849},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn.2019.8852012","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn.2019.8852012","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.7099999785423279,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1530262073","https://openalex.org/W1667652561","https://openalex.org/W1686810756","https://openalex.org/W2043275593","https://openalex.org/W2073061372","https://openalex.org/W2097117768","https://openalex.org/W2155893237","https://openalex.org/W2163605009","https://openalex.org/W2186615578","https://openalex.org/W2346804249","https://openalex.org/W2605739168","https://openalex.org/W2670151302","https://openalex.org/W2754249189","https://openalex.org/W2803425801","https://openalex.org/W2810130280","https://openalex.org/W2953384591","https://openalex.org/W2953879634","https://openalex.org/W2963887620","https://openalex.org/W4302296459","https://openalex.org/W6631660994","https://openalex.org/W6637151318","https://openalex.org/W6637373629","https://openalex.org/W6684191040","https://openalex.org/W6686509673","https://openalex.org/W6713134421","https://openalex.org/W6740373327","https://openalex.org/W6744651773"],"related_works":["https://openalex.org/W2017990332","https://openalex.org/W2080337923","https://openalex.org/W1488776355","https://openalex.org/W2596457687","https://openalex.org/W4287593139","https://openalex.org/W3099313426","https://openalex.org/W752783541","https://openalex.org/W1516189266","https://openalex.org/W2093666864","https://openalex.org/W4378602953"],"abstract_inverted_index":{"Convolution":[0],"neural":[1],"networks":[2],"(CNNs)":[3],"have":[4],"been":[5],"extensively":[6],"used":[7],"in":[8,131],"machine":[9],"learning":[10],"applications.":[11],"The":[12,58,108],"most":[13,132],"time-consuming":[14],"part":[15],"of":[16,46,67],"CNNs":[17],"are":[18,42,70],"convolution":[19,26,68,82],"operations.":[20],"A":[21],"common":[22],"approach":[23],"to":[24,29],"implementing":[25],"operations":[27,69],"is":[28,50,56,60],"recast":[30],"them":[31],"as":[32,37],"general":[33],"matrix":[34,86],"multiplication,":[35],"known":[36],"the":[38,61,64,100,105,128],"im2col+GEMM":[39,129],"approach.":[40,48],"There":[41],"two":[43,112],"main":[44],"drawbacks":[45],"this":[47,75],"One":[49],"that":[51,117],"large":[52],"additional":[53],"memory":[54,101],"space":[55],"required.":[57],"other":[59],"packing":[62,106],"on":[63,88,111],"input":[65],"elements":[66],"not":[71],"memory-efficient":[72],"enough.":[73],"In":[74,91],"paper,":[76],"we":[77],"present":[78],"a":[79],"new":[80,96,119],"parallel":[81],"algorithm":[83,97,120],"using":[84],"implicit":[85],"multiplication":[87],"multi-core":[89,114],"CPUs.":[90],"comparison":[92],"with":[93],"Im2col+GEMM,":[94],"our":[95,118],"can":[98],"reduce":[99],"footprints":[102],"and":[103,125],"improve":[104],"efficiency.":[107],"experiment":[109],"results":[110],"ARV8-based":[113],"CPUs":[115],"demonstrate":[116],"gives":[121],"much":[122],"better":[123],"performance":[124],"scalability":[126],"than":[127],"method":[130],"cases.":[133]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
