{"id":"https://openalex.org/W4321446149","doi":"https://doi.org/10.1145/3572848.3577435","title":"Efficient Direct Convolution Using Long SIMD Instructions","display_name":"Efficient Direct Convolution Using Long SIMD Instructions","publication_year":2023,"publication_date":"2023-02-21","ids":{"openalex":"https://openalex.org/W4321446149","doi":"https://doi.org/10.1145/3572848.3577435"},"language":"en","primary_location":{"id":"doi:10.1145/3572848.3577435","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3572848.3577435","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://upcommons.upc.edu/bitstreams/43600b99-dc5a-4820-9cde-4a77ccc020f3/download","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006969445","display_name":"Alexandre Santana","orcid":"https://orcid.org/0000-0002-3203-3662"},"institutions":[{"id":"https://openalex.org/I2799803557","display_name":"Barcelona Supercomputing Center","ror":"https://ror.org/05sd8tv96","country_code":"ES","type":"facility","lineage":["https://openalex.org/I2799803557","https://openalex.org/I9617848"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Alexandre de Limas Santana","raw_affiliation_strings":["Barcelona Supercomputing Center, Barcelona, Catalunya, Spain and Universitat Polit\u00e8cnica de Catalunya, Barcelona, Catalunya, Spain"],"raw_orcid":"https://orcid.org/0000-0002-3203-3662","affiliations":[{"raw_affiliation_string":"Barcelona Supercomputing Center, Barcelona, Catalunya, Spain and Universitat Polit\u00e8cnica de Catalunya, Barcelona, Catalunya, Spain","institution_ids":["https://openalex.org/I2799803557","https://openalex.org/I9617848"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049697257","display_name":"Adri\u00e0 Armejach","orcid":"https://orcid.org/0000-0003-2869-668X"},"institutions":[{"id":"https://openalex.org/I2799803557","display_name":"Barcelona Supercomputing Center","ror":"https://ror.org/05sd8tv96","country_code":"ES","type":"facility","lineage":["https://openalex.org/I2799803557","https://openalex.org/I9617848"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Adri\u00e0 Armejach","raw_affiliation_strings":["Barcelona Supercomputing Center, Barcelona, Catalunya, Spain and Universitat Polit\u00e8cnica de Catalunya, Barcelona, Catalunya, Spain"],"raw_orcid":"https://orcid.org/0000-0003-2869-668X","affiliations":[{"raw_affiliation_string":"Barcelona Supercomputing Center, Barcelona, Catalunya, Spain and Universitat Polit\u00e8cnica de Catalunya, Barcelona, Catalunya, Spain","institution_ids":["https://openalex.org/I2799803557","https://openalex.org/I9617848"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044183257","display_name":"Marc Casas","orcid":"https://orcid.org/0000-0003-4564-2093"},"institutions":[{"id":"https://openalex.org/I2799803557","display_name":"Barcelona Supercomputing Center","ror":"https://ror.org/05sd8tv96","country_code":"ES","type":"facility","lineage":["https://openalex.org/I2799803557","https://openalex.org/I9617848"]},{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Marc Casas","raw_affiliation_strings":["Barcelona Supercomputing Center, Barcelona, Catalunya, Spain and Universitat Polit\u00e8cnica de Catalunya, Barcelona, Catalunya, Spain"],"raw_orcid":"https://orcid.org/0000-0003-4564-2093","affiliations":[{"raw_affiliation_string":"Barcelona Supercomputing Center, Barcelona, Catalunya, Spain and Universitat Polit\u00e8cnica de Catalunya, Barcelona, Catalunya, Spain","institution_ids":["https://openalex.org/I2799803557","https://openalex.org/I9617848"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5006969445"],"corresponding_institution_ids":["https://openalex.org/I2799803557","https://openalex.org/I9617848"],"apc_list":null,"apc_paid":null,"fwci":1.3898,"has_fulltext":true,"cited_by_count":12,"citation_normalized_percentile":{"value":0.83075439,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"342","last_page":"353"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.9504860639572144},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.856501042842865},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7942972183227539},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.6662113666534424},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.6423989534378052},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.5395336747169495},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5037750601768494},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.49790000915527344},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.21810418367385864},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08282631635665894},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.07296311855316162}],"concepts":[{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.9504860639572144},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.856501042842865},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7942972183227539},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.6662113666534424},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.6423989534378052},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.5395336747169495},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5037750601768494},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.49790000915527344},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.21810418367385864},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08282631635665894},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.07296311855316162},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3572848.3577435","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3572848.3577435","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming","raw_type":"proceedings-article"},{"id":"pmh:oai:upcommons.upc.edu:2117/387544","is_oa":true,"landing_page_url":"https://hdl.handle.net/2117/387544","pdf_url":"https://upcommons.upc.edu/bitstreams/43600b99-dc5a-4820-9cde-4a77ccc020f3/download","source":{"id":"https://openalex.org/S4377196262","display_name":"UPCommons institutional repository (Universitat Polit\u00e8cnica de Catalunya)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9617848","host_organization_name":"Universitat Polit\u00e8cnica de Catalunya","host_organization_lineage":["https://openalex.org/I9617848"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference report"},{"id":"pmh:oai:upcommons.upc.edu:2117/385749","is_oa":true,"landing_page_url":"http://hdl.handle.net/2117/385749","pdf_url":null,"source":{"id":"https://openalex.org/S4377196262","display_name":"UPCommons institutional repository (Universitat Polit\u00e8cnica de Catalunya)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9617848","host_organization_name":"Universitat Polit\u00e8cnica de Catalunya","host_organization_lineage":["https://openalex.org/I9617848"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/submittedVersion"}],"best_oa_location":{"id":"pmh:oai:upcommons.upc.edu:2117/387544","is_oa":true,"landing_page_url":"https://hdl.handle.net/2117/387544","pdf_url":"https://upcommons.upc.edu/bitstreams/43600b99-dc5a-4820-9cde-4a77ccc020f3/download","source":{"id":"https://openalex.org/S4377196262","display_name":"UPCommons institutional repository (Universitat Polit\u00e8cnica de Catalunya)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9617848","host_organization_name":"Universitat Polit\u00e8cnica de Catalunya","host_organization_lineage":["https://openalex.org/I9617848"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference report"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/1","score":0.47999998927116394,"display_name":"No poverty"}],"awards":[{"id":"https://openalex.org/G1599646647","display_name":null,"funder_award_id":"PID2019","funder_id":"https://openalex.org/F4320338080","funder_display_name":"European Social Fund"},{"id":"https://openalex.org/G1643949827","display_name":null,"funder_award_id":"AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G2064873908","display_name":null,"funder_award_id":"10.13039/501100011033","funder_id":"https://openalex.org/F4320321505","funder_display_name":"Generalitat de Catalunya"},{"id":"https://openalex.org/G2262748287","display_name":null,"funder_award_id":"501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G3005324705","display_name":null,"funder_award_id":"501100011033","funder_id":"https://openalex.org/F4320321505","funder_display_name":"Generalitat de Catalunya"},{"id":"https://openalex.org/G300979063","display_name":null,"funder_award_id":"10.13039/501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G3219925899","display_name":null,"funder_award_id":"MCIN/AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G3480869486","display_name":null,"funder_award_id":"13039","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G4230454509","display_name":null,"funder_award_id":"IJCI-2017-33945","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G4305436266","display_name":null,"funder_award_id":"MCIN/AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320338080","funder_display_name":"European Social Fund"},{"id":"https://openalex.org/G440774710","display_name":null,"funder_award_id":"RYC-2017-23269","funder_id":"https://openalex.org/F4320321505","funder_display_name":"Generalitat de Catalunya"},{"id":"https://openalex.org/G4431839286","display_name":null,"funder_award_id":"PID2019-107255GB","funder_id":"https://openalex.org/F4320323737","funder_display_name":"Ministerio de Ciencia y Tecnolog\u00eda"},{"id":"https://openalex.org/G4976336944","display_name":null,"funder_award_id":"MCIN/AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320321505","funder_display_name":"Generalitat de Catalunya"},{"id":"https://openalex.org/G5080475149","display_name":null,"funder_award_id":"10.13039","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G6218905885","display_name":null,"funder_award_id":"501100011033","funder_id":"https://openalex.org/F4320338080","funder_display_name":"European Social Fund"},{"id":"https://openalex.org/G6396138059","display_name":null,"funder_award_id":"AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320323737","funder_display_name":"Ministerio de Ciencia y Tecnolog\u00eda"},{"id":"https://openalex.org/G6456004180","display_name":null,"funder_award_id":"2017-SGR-1414","funder_id":"https://openalex.org/F4320321505","funder_display_name":"Generalitat de Catalunya"},{"id":"https://openalex.org/G6833763678","display_name":null,"funder_award_id":"PID2019-107255GB","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G697566953","display_name":null,"funder_award_id":"MCIN/AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320323737","funder_display_name":"Ministerio de Ciencia y Tecnolog\u00eda"},{"id":"https://openalex.org/G7084143925","display_name":null,"funder_award_id":"AEI/10","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G7266728691","display_name":null,"funder_award_id":"13039/501100011033","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G7425137858","display_name":null,"funder_award_id":"PID2019","funder_id":"https://openalex.org/F4320321505","funder_display_name":"Generalitat de Catalunya"},{"id":"https://openalex.org/G7535663061","display_name":null,"funder_award_id":"AEI/10.","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G8684520763","display_name":null,"funder_award_id":"PID2019","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G8888022036","display_name":null,"funder_award_id":"AEI/10.13039/501100011033","funder_id":"https://openalex.org/F4320338080","funder_display_name":"European Social Fund"}],"funders":[{"id":"https://openalex.org/F4320321505","display_name":"Generalitat de Catalunya","ror":"https://ror.org/01bg62x04"},{"id":"https://openalex.org/F4320323737","display_name":"Ministerio de Ciencia y Tecnolog\u00eda","ror":"https://ror.org/034900433"},{"id":"https://openalex.org/F4320335598","display_name":"Agencia Estatal de Investigaci\u00f3n","ror":null},{"id":"https://openalex.org/F4320338080","display_name":"European Social Fund","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4321446149.pdf"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W1884620995","https://openalex.org/W1988888548","https://openalex.org/W2056971515","https://openalex.org/W2097117768","https://openalex.org/W2155893237","https://openalex.org/W2194775991","https://openalex.org/W2442974303","https://openalex.org/W2604319603","https://openalex.org/W2605739168","https://openalex.org/W2612094043","https://openalex.org/W2613264175","https://openalex.org/W2951894856","https://openalex.org/W2970971581","https://openalex.org/W3123542955","https://openalex.org/W4244254628","https://openalex.org/W6600113797","https://openalex.org/W6600120041","https://openalex.org/W6628980590","https://openalex.org/W6672970576","https://openalex.org/W6829887170"],"related_works":["https://openalex.org/W1637649405","https://openalex.org/W1585350690","https://openalex.org/W2008876287","https://openalex.org/W2078690603","https://openalex.org/W4245302940","https://openalex.org/W2074226157","https://openalex.org/W1579918296","https://openalex.org/W4321446149","https://openalex.org/W2094969226","https://openalex.org/W1606851719"],"abstract_inverted_index":{"This":[0],"paper":[1],"demonstrates":[2],"that":[3,126],"state-of-the-art":[4,36,105,140],"proposals":[5],"to":[6,24,33,40,78,94,138,149],"compute":[7],"convolutions":[8],"on":[9,55,111],"architectures":[10,41],"with":[11,116],"CPUs":[12,115],"supporting":[13],"SIMD":[14,21,37,44,53,118],"instructions":[15,45],"deliver":[16],"poor":[17],"performance":[18],"for":[19,142],"long":[20,43],"lengths":[22],"due":[23],"frequent":[25],"cache":[26,80],"conflict":[27],"misses.":[28],"We":[29,100],"first":[30],"discuss":[31],"how":[32],"adapt":[34],"the":[35,48,52,56,66,73,83,90,96,104,139,150],"direct":[38],"convolution":[39],"using":[42,120],"and":[46,82,107,128,135,144,146],"analyze":[47],"implications":[49],"of":[50,75,133],"increasing":[51],"length":[54],"algorithm":[57],"formulation.":[58],"Next,":[59],"we":[60],"propose":[61],"two":[62],"new":[63],"algorithmic":[64],"approaches:":[65],"Bounded":[67],"Direct":[68,85],"Convolution":[69,86],"(BDC),":[70],"which":[71,88],"adapts":[72],"amount":[74],"computation":[76],"exposed":[77],"mitigate":[79],"misses,":[81],"Multi-Block":[84],"(MBDC),":[87],"redefines":[89],"activation":[91],"memory":[92,97],"layout":[93],"improve":[95],"access":[98],"pattern.":[99],"evaluate":[101],"BDC,":[102],"MBDC,":[103],"technique,":[106],"a":[108],"proprietary":[109,151],"library":[110],"an":[112],"architecture":[113],"featuring":[114],"16,384-bit":[117],"registers":[119],"ResNet":[121],"convolutions.":[122],"Our":[123],"results":[124],"show":[125],"BDC":[127],"MBDC":[129],"achieve":[130],"respective":[131],"speed-ups":[132],"1.44\u00d7":[134],"1.28\u00d7":[136],"compared":[137,148],"technique":[141],"ResNet-101,":[143],"1.83\u00d7":[145],"1.63\u00d7":[147],"library.":[152]},"counts_by_year":[{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":2}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2023-02-22T00:00:00"}
