{"id":"https://openalex.org/W3022509528","doi":"https://doi.org/10.3233/apc200092","title":"On the Performance and Energy Efficiency of Sparse Matrix-Vector Multiplication on FPGAs","display_name":"On the Performance and Energy Efficiency of Sparse Matrix-Vector Multiplication on FPGAs","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3022509528","doi":"https://doi.org/10.3233/apc200092","mag":"3022509528"},"language":"en","primary_location":{"id":"doi:10.3233/apc200092","is_oa":true,"landing_page_url":"https://doi.org/10.3233/apc200092","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/APC200092","source":{"id":"https://openalex.org/S4210175178","display_name":"Advances in parallel computing","issn_l":"0927-5452","issn":["0927-5452","1879-808X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Advances in Parallel Computing","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/APC200092","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043327478","display_name":"Panagiotis Mpakos","orcid":"https://orcid.org/0009-0006-5148-6903"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mpakos Panagiotis","raw_affiliation_strings":["National Technical University of Athens, pmpakos@cslab.ece.ntua.gr, nikela@cslab.ece.ntua.gr, xalverti@cslab.ece.ntua.gr, goumas@cslab.ece.ntua.gr, nkoziris@cslab.ece.ntua.gr"],"affiliations":[{"raw_affiliation_string":"National Technical University of Athens, pmpakos@cslab.ece.ntua.gr, nikela@cslab.ece.ntua.gr, xalverti@cslab.ece.ntua.gr, goumas@cslab.ece.ntua.gr, nkoziris@cslab.ece.ntua.gr","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057776164","display_name":"\u039d\u03b9\u03ba\u03ad\u03bb\u03b1 \u03a0\u03b1\u03c0\u03b1\u03b4\u03bf\u03c0\u03bf\u03cd\u03bb\u03bf\u03c5","orcid":"https://orcid.org/0000-0003-2141-5654"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Papadopoulou Nikela","raw_affiliation_strings":["National Technical University of Athens, pmpakos@cslab.ece.ntua.gr, nikela@cslab.ece.ntua.gr, xalverti@cslab.ece.ntua.gr, goumas@cslab.ece.ntua.gr, nkoziris@cslab.ece.ntua.gr"],"affiliations":[{"raw_affiliation_string":"National Technical University of Athens, pmpakos@cslab.ece.ntua.gr, nikela@cslab.ece.ntua.gr, xalverti@cslab.ece.ntua.gr, goumas@cslab.ece.ntua.gr, nkoziris@cslab.ece.ntua.gr","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044661029","display_name":"Chloe Alverti","orcid":"https://orcid.org/0000-0002-7965-0510"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alverti Chloe","raw_affiliation_strings":["National Technical University of Athens, pmpakos@cslab.ece.ntua.gr, nikela@cslab.ece.ntua.gr, xalverti@cslab.ece.ntua.gr, goumas@cslab.ece.ntua.gr, nkoziris@cslab.ece.ntua.gr"],"affiliations":[{"raw_affiliation_string":"National Technical University of Athens, pmpakos@cslab.ece.ntua.gr, nikela@cslab.ece.ntua.gr, xalverti@cslab.ece.ntua.gr, goumas@cslab.ece.ntua.gr, nkoziris@cslab.ece.ntua.gr","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023870696","display_name":"Georgios Goumas","orcid":"https://orcid.org/0000-0001-7811-4831"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goumas Georgios","raw_affiliation_strings":["National Technical University of Athens, pmpakos@cslab.ece.ntua.gr, nikela@cslab.ece.ntua.gr, xalverti@cslab.ece.ntua.gr, goumas@cslab.ece.ntua.gr, nkoziris@cslab.ece.ntua.gr"],"affiliations":[{"raw_affiliation_string":"National Technical University of Athens, pmpakos@cslab.ece.ntua.gr, nikela@cslab.ece.ntua.gr, xalverti@cslab.ece.ntua.gr, goumas@cslab.ece.ntua.gr, nkoziris@cslab.ece.ntua.gr","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023526161","display_name":"Nectarios Koziris","orcid":"https://orcid.org/0000-0002-4890-8427"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Koziris Nectarios","raw_affiliation_strings":["National Technical University of Athens, pmpakos@cslab.ece.ntua.gr, nikela@cslab.ece.ntua.gr, xalverti@cslab.ece.ntua.gr, goumas@cslab.ece.ntua.gr, nkoziris@cslab.ece.ntua.gr"],"affiliations":[{"raw_affiliation_string":"National Technical University of Athens, pmpakos@cslab.ece.ntua.gr, nikela@cslab.ece.ntua.gr, xalverti@cslab.ece.ntua.gr, goumas@cslab.ece.ntua.gr, nkoziris@cslab.ece.ntua.gr","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5043327478"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6686,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.62941176,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.9807000160217285,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7642703652381897},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.7496557235717773},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7325063943862915},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5593779683113098},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.5344561338424683},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.5052242875099182},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.5021827220916748},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.4145034849643707},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.20903921127319336},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10263922810554504}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7642703652381897},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.7496557235717773},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7325063943862915},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5593779683113098},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.5344561338424683},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.5052242875099182},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.5021827220916748},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.4145034849643707},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.20903921127319336},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10263922810554504},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/apc200092","is_oa":true,"landing_page_url":"https://doi.org/10.3233/apc200092","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/APC200092","source":{"id":"https://openalex.org/S4210175178","display_name":"Advances in parallel computing","issn_l":"0927-5452","issn":["0927-5452","1879-808X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Advances in Parallel Computing","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/apc200092","is_oa":true,"landing_page_url":"https://doi.org/10.3233/apc200092","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/APC200092","source":{"id":"https://openalex.org/S4210175178","display_name":"Advances in parallel computing","issn_l":"0927-5452","issn":["0927-5452","1879-808X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Advances in Parallel Computing","raw_type":"book-chapter"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.8999999761581421,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3022509528.pdf","grobid_xml":"https://content.openalex.org/works/W3022509528.grobid-xml"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W1527464046","https://openalex.org/W1537260300","https://openalex.org/W1981252059","https://openalex.org/W1990832096","https://openalex.org/W2004455575","https://openalex.org/W2022219663","https://openalex.org/W2035080386","https://openalex.org/W2040412852","https://openalex.org/W2092633783","https://openalex.org/W2106111377","https://openalex.org/W2148570406","https://openalex.org/W2154991996","https://openalex.org/W2160280813","https://openalex.org/W2412759392","https://openalex.org/W2534566880","https://openalex.org/W2615755298","https://openalex.org/W2727717548","https://openalex.org/W2941763154","https://openalex.org/W6681775853","https://openalex.org/W6683758221","https://openalex.org/W6738662749","https://openalex.org/W7027593928"],"related_works":["https://openalex.org/W3099313426","https://openalex.org/W4287593139","https://openalex.org/W752783541","https://openalex.org/W1506547947","https://openalex.org/W4206811032","https://openalex.org/W2995605830","https://openalex.org/W4239424132","https://openalex.org/W2596457687","https://openalex.org/W3212757063","https://openalex.org/W2086123442"],"abstract_inverted_index":{"The":[0],"Sparse":[1],"Matrix-Vector":[2],"Multiplication":[3],"kernel":[4,127,155,170],"(SpMV)":[5],"has":[6,30],"been":[7,31],"one":[8,32],"of":[9,21,33,50,68,167,180,194],"the":[10,18,26,34,60,65,69,82,85,125,150,153,162,165,168,175,207],"most":[11,35],"popular":[12],"kernels":[13],"in":[14,183],"high-performance":[15,90],"computing,":[16],"as":[17,112],"building":[19],"block":[20],"many":[22],"iterative":[23],"solvers.":[24],"At":[25],"same":[27],"time,":[28],"it":[29],"notorious":[36],"kernels,":[37],"due":[38],"to":[39,48,74,149,160,185],"its":[40],"low":[41],"flop":[42],"per":[43,103],"byte":[44],"ratio,":[45],"which":[46],"leads":[47],"under-utilization":[49],"modern":[51,186],"processing":[52,98],"system":[53,62],"resources":[54],"and":[55,64,156,164,177,188,197],"a":[56,95,191],"huge":[57],"gap":[58],"between":[59],"peak":[61],"performance":[63,67,76,102,166,176],"observed":[66],"kernel.":[70,209],"However,":[71],"moving":[72],"forward":[73],"exascale,":[75],"by":[77],"itself":[78],"is":[79,93],"no":[80],"longer":[81],"holy":[83],"grail;":[84],"requirement":[86],"for":[87,117,134,138,152,190,206],"energy":[88,178],"efficient":[89],"computing":[91],"systems":[92],"driving":[94],"trend":[96],"towards":[97,130],"units":[99],"with":[100],"better":[101],"watt":[104],"ratios.":[105],"Following":[106],"this":[107,121],"trend,":[108],"FP-GAs":[109],"have":[110],"emerged":[111],"an":[113,131,203],"alternative,":[114],"low-power":[115],"accelerator":[116],"high-end":[118],"systems.":[119],"In":[120],"paper,":[122],"we":[123],"implement":[124],"SpMV":[126,154,169,208],"on":[128,146,171],"FPGAs,":[129],"accelerated":[132],"library":[133],"sparse":[135,195],"matrix":[136],"computations,":[137],"single-precision":[139],"floating":[140],"point":[141],"values.":[142],"Our":[143],"implementation":[144],"focuses":[145],"optimizing":[147],"access":[148],"data":[151],"applies":[157],"common":[158],"optimizations":[159],"improve":[161],"parallelism":[163],"FPGAs.":[172],"We":[173],"evaluate":[174],"efficiency":[179],"our":[181],"implementation,":[182],"comparison":[184],"CPUs":[187],"GPUs,":[189],"diverse":[192],"set":[193],"matrices":[196],"demonstrate":[198],"that":[199],"FPGAs":[200],"can":[201],"be":[202],"energy-efficient":[204],"solution":[205]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2020-05-13T00:00:00"}
