{"id":"https://openalex.org/W4286306090","doi":"https://doi.org/10.23919/ascc56756.2022.9828222","title":"Fast Blockwise Matrix-Matrix Multiplication Using AVX and Prefetching on Shared Memory","display_name":"Fast Blockwise Matrix-Matrix Multiplication Using AVX and Prefetching on Shared Memory","publication_year":2022,"publication_date":"2022-05-04","ids":{"openalex":"https://openalex.org/W4286306090","doi":"https://doi.org/10.23919/ascc56756.2022.9828222"},"language":"en","primary_location":{"id":"doi:10.23919/ascc56756.2022.9828222","is_oa":false,"landing_page_url":"https://doi.org/10.23919/ascc56756.2022.9828222","pdf_url":null,"source":{"id":"https://openalex.org/S4363607827","display_name":"2022 13th Asian Control Conference (ASCC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 13th Asian Control Conference (ASCC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013036202","display_name":"Nwe Zin Oo","orcid":null},"institutions":[{"id":"https://openalex.org/I131868736","display_name":"Prince of Songkla University","ror":"https://ror.org/0575ycz84","country_code":"TH","type":"education","lineage":["https://openalex.org/I131868736"]}],"countries":["TH"],"is_corresponding":true,"raw_author_name":"Nwe Zin Oo","raw_affiliation_strings":["Prince of Songkla University,Faculty of Engineering,Department of Computer Engineering,Hat Yai,Thailand","Department of Computer Engineering, Faculty of Engineering, Prince of Songkla University, Hat Yai, Thailand"],"affiliations":[{"raw_affiliation_string":"Prince of Songkla University,Faculty of Engineering,Department of Computer Engineering,Hat Yai,Thailand","institution_ids":["https://openalex.org/I131868736"]},{"raw_affiliation_string":"Department of Computer Engineering, Faculty of Engineering, Prince of Songkla University, Hat Yai, Thailand","institution_ids":["https://openalex.org/I131868736"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079273973","display_name":"Panyayot Chaikan","orcid":"https://orcid.org/0000-0003-1066-2676"},"institutions":[{"id":"https://openalex.org/I131868736","display_name":"Prince of Songkla University","ror":"https://ror.org/0575ycz84","country_code":"TH","type":"education","lineage":["https://openalex.org/I131868736"]}],"countries":["TH"],"is_corresponding":false,"raw_author_name":"Panyayot Chaikan","raw_affiliation_strings":["Prince of Songkla University,Faculty of Engineering,Department of Computer Engineering,Hat Yai,Thailand","Department of Computer Engineering, Faculty of Engineering, Prince of Songkla University, Hat Yai, Thailand"],"affiliations":[{"raw_affiliation_string":"Prince of Songkla University,Faculty of Engineering,Department of Computer Engineering,Hat Yai,Thailand","institution_ids":["https://openalex.org/I131868736"]},{"raw_affiliation_string":"Department of Computer Engineering, Faculty of Engineering, Prince of Songkla University, Hat Yai, Thailand","institution_ids":["https://openalex.org/I131868736"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5013036202"],"corresponding_institution_ids":["https://openalex.org/I131868736"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06875632,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"448","last_page":"452"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.8419906497001648},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.7883466482162476},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7440963387489319},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.667499303817749},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.6337102651596069},{"id":"https://openalex.org/keywords/intrinsics","display_name":"Intrinsics","score":0.536057710647583},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.42107564210891724},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.19668513536453247},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.18064239621162415}],"concepts":[{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.8419906497001648},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.7883466482162476},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7440963387489319},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.667499303817749},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.6337102651596069},{"id":"https://openalex.org/C2908650547","wikidata":"https://www.wikidata.org/wiki/Q20999234","display_name":"Intrinsics","level":2,"score":0.536057710647583},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.42107564210891724},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.19668513536453247},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.18064239621162415},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/ascc56756.2022.9828222","is_oa":false,"landing_page_url":"https://doi.org/10.23919/ascc56756.2022.9828222","pdf_url":null,"source":{"id":"https://openalex.org/S4363607827","display_name":"2022 13th Asian Control Conference (ASCC)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 13th Asian Control Conference (ASCC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5699999928474426,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320309823","display_name":"Higher Education Research Promotion","ror":"https://ror.org/02wa0fq92"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W1600301286","https://openalex.org/W2055491819","https://openalex.org/W2187551443","https://openalex.org/W2806900363","https://openalex.org/W2913637428","https://openalex.org/W2914939784","https://openalex.org/W3202268066","https://openalex.org/W4405130464","https://openalex.org/W6635962089","https://openalex.org/W6874790528"],"related_works":["https://openalex.org/W2623389408","https://openalex.org/W3099313426","https://openalex.org/W4287593139","https://openalex.org/W752783541","https://openalex.org/W4312354936","https://openalex.org/W1506547947","https://openalex.org/W2995605830","https://openalex.org/W4206811032","https://openalex.org/W2086123442","https://openalex.org/W2596457687"],"abstract_inverted_index":{"In":[0,22,82],"recent":[1],"multicore":[2],"architectures,":[3],"parallel":[4,23],"computing":[5],"with":[6,97],"vectorization":[7],"emerges":[8],"for":[9,54,142],"mathematical":[10],"calculations":[11,68],"and":[12,50,94,154],"image":[13],"processing":[14],"by":[15,63,76,91,121,159],"exploiting":[16],"Intel":[17,169],"Advanced":[18],"Vector":[19],"Extensions":[20],"(AVX).":[21],"computing,":[24],"the":[25,36,41,45,51,98,136,143,147,155],"performance":[26,156],"of":[27,38,43,47,145],"modern":[28],"processors":[29],"depends":[30],"on":[31,69,102,106,168],"many":[32],"factors,":[33],"such":[34],"as":[35],"amount":[37],"memory":[39,119,123],"storage,":[40],"size":[42,144],"caches,":[44],"number":[46],"available":[48],"processors,":[49],"programming":[52],"methodologies":[53],"efficiency.":[55],"Besides":[56],"that,":[57],"a":[58,133],"hundred":[59],"vector":[60],"intrinsics":[61],"supported":[62],"AVX":[64,93],"can":[65,73,115],"perform":[66],"various":[67],"floating-point":[70],"operations":[71],"that":[72],"be":[74],"optimized":[75],"applying":[77,164],"AVX-256":[78],"or":[79],"AVX-512":[80],"registers.":[81],"this":[83],"paper,":[84],"fast":[85],"recursive":[86],"matrix":[87,137],"multiplication":[88,105,138],"is":[89],"proposed":[90,110],"using":[92],"OpenMP":[95],"combined":[96],"software":[99],"prefetching":[100,113,122,140,165],"method":[101,141],"block-wise":[103],"matrix-matrix":[104],"shared":[107],"memory.":[108],"The":[109],"version":[111],"applied":[112,139],"which":[114],"attempt":[116],"to":[117],"reduce":[118],"latencies":[120],"pages":[124],"in":[125],"advance":[126],"before":[127],"they":[128],"have":[129],"been":[130],"used.":[131],"As":[132],"result,":[134],"when":[135],"8192\u00d78192,":[146],"execution":[148],"time":[149],"was":[150,157],"reduced":[151],"about":[152,160],"22%":[153],"enhanced":[158],"17%":[161],"than":[162],"without":[163],"while":[166],"testing":[167],"core":[170],"i7":[171],"processor.":[172]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
