{"id":"https://openalex.org/W3150230160","doi":"https://doi.org/10.1109/dac18074.2021.9586203","title":"Scaling up HBM Efficiency of Top-K SpMV for Approximate Embedding Similarity on FPGAs","display_name":"Scaling up HBM Efficiency of Top-K SpMV for Approximate Embedding Similarity on FPGAs","publication_year":2021,"publication_date":"2021-11-08","ids":{"openalex":"https://openalex.org/W3150230160","doi":"https://doi.org/10.1109/dac18074.2021.9586203","mag":"3150230160"},"language":"en","primary_location":{"id":"doi:10.1109/dac18074.2021.9586203","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac18074.2021.9586203","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 58th ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2103.04808","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000548939","display_name":"Alberto Parravicini","orcid":"https://orcid.org/0000-0001-8806-1665"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Alberto Parravicini","raw_affiliation_strings":["Politecnico di Milano, DEIB, Milan, Italy","Politecnico di Milano \u2013 DEIB, Milan, Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Milano, DEIB, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]},{"raw_affiliation_string":"Politecnico di Milano \u2013 DEIB, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039003666","display_name":"Luca Giuseppe Cellamare","orcid":null},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Luca Giuseppe Cellamare","raw_affiliation_strings":["Politecnico di Milano, DEIB, Milan, Italy","Politecnico di Milano \u2013 DEIB, Milan, Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Milano, DEIB, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]},{"raw_affiliation_string":"Politecnico di Milano \u2013 DEIB, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053721069","display_name":"Marco Siracusa","orcid":"https://orcid.org/0000-0003-2782-837X"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Marco Siracusa","raw_affiliation_strings":["Politecnico di Milano, DEIB, Milan, Italy","Politecnico di Milano \u2013 DEIB, Milan, Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Milano, DEIB, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]},{"raw_affiliation_string":"Politecnico di Milano \u2013 DEIB, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010543929","display_name":"Marco D. Santambrogio","orcid":"https://orcid.org/0000-0002-9883-9693"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Marco D. Santambrogio","raw_affiliation_strings":["Politecnico di Milano, DEIB, Milan, Italy","Politecnico di Milano \u2013 DEIB, Milan, Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Milano, DEIB, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]},{"raw_affiliation_string":"Politecnico di Milano \u2013 DEIB, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5000548939"],"corresponding_institution_ids":["https://openalex.org/I93860229"],"apc_list":null,"apc_paid":null,"fwci":0.6371,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.69837693,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"799","last_page":"804"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10796","display_name":"Cooperative Communication and Network Coding","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10796","display_name":"Cooperative Communication and Network Coding","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8165159821510315},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6817420721054077},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.6628130674362183},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.580798864364624},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.577726423740387},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5493530631065369},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.5163244605064392},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4916403889656067},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.46993547677993774},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.41153693199157715},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.35053712129592896},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3043151795864105},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.11521586775779724},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.08981499075889587},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07642862200737}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8165159821510315},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6817420721054077},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.6628130674362183},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.580798864364624},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.577726423740387},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5493530631065369},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.5163244605064392},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4916403889656067},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.46993547677993774},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.41153693199157715},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.35053712129592896},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3043151795864105},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.11521586775779724},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.08981499075889587},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07642862200737},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/dac18074.2021.9586203","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac18074.2021.9586203","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 58th ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2103.04808","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2103.04808","pdf_url":"https://arxiv.org/pdf/2103.04808","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:3150230160","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/2103.04808","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:re.public.polimi.it:11311/1280314","is_oa":false,"landing_page_url":"https://hdl.handle.net/11311/1280314","pdf_url":null,"source":{"id":"https://openalex.org/S4306400312","display_name":"Virtual Community of Pathological Anatomy (University of Castilla La Mancha)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79189158","host_organization_name":"University of Castilla-La Mancha","host_organization_lineage":["https://openalex.org/I79189158"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"},{"id":"doi:10.48550/arxiv.2103.04808","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2103.04808","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2103.04808","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2103.04808","pdf_url":"https://arxiv.org/pdf/2103.04808","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.8100000023841858,"display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3150230160.pdf"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W845977080","https://openalex.org/W1480958225","https://openalex.org/W1508286210","https://openalex.org/W1537260300","https://openalex.org/W2005876975","https://openalex.org/W2108630796","https://openalex.org/W2127271355","https://openalex.org/W2250539671","https://openalex.org/W2558367073","https://openalex.org/W2579831847","https://openalex.org/W2757662681","https://openalex.org/W2895305554","https://openalex.org/W2943422048","https://openalex.org/W2949985202","https://openalex.org/W2951489508","https://openalex.org/W2957191877","https://openalex.org/W2964337156","https://openalex.org/W2979747168","https://openalex.org/W2996522450","https://openalex.org/W3034855459","https://openalex.org/W3042997474","https://openalex.org/W3048903267","https://openalex.org/W3105114834","https://openalex.org/W3126890733","https://openalex.org/W6757123339","https://openalex.org/W6780830597","https://openalex.org/W6783450968","https://openalex.org/W6787400498","https://openalex.org/W6789083386"],"related_works":["https://openalex.org/W3213945436","https://openalex.org/W2535419693","https://openalex.org/W2885034848","https://openalex.org/W2971441423","https://openalex.org/W2725757310","https://openalex.org/W2535926538","https://openalex.org/W1981718088","https://openalex.org/W1989568755","https://openalex.org/W3011554625","https://openalex.org/W2415007423","https://openalex.org/W2162283062","https://openalex.org/W2950015285","https://openalex.org/W2908185957","https://openalex.org/W2727717548","https://openalex.org/W2782176700","https://openalex.org/W3115462165","https://openalex.org/W2182219686","https://openalex.org/W2991370896","https://openalex.org/W3087988169","https://openalex.org/W2952807630"],"abstract_inverted_index":{"Top-K":[0],"SpMV":[1],"is":[2],"a":[3,33,41,50,81,89],"key":[4],"component":[5],"of":[6],"similarity-search":[7],"on":[8,18],"sparse":[9,12],"embeddings.":[10],"This":[11],"workload":[13],"does":[14],"not":[15],"perform":[16],"well":[17],"general-purpose":[19],"NUMA":[20],"systems":[21],"that":[22,45],"employ":[23],"traditional":[24],"caching":[25],"strategies.":[26],"Instead,":[27],"modern":[28],"FPGA":[29,43],"accelerator":[30],"cards":[31],"have":[32],"few":[34],"tricks":[35],"up":[36],"their":[37],"sleeve.":[38],"We":[39],"introduce":[40],"Top-KSpMV":[42],"design":[44],"leverages":[46],"reduced":[47],"precision":[48],"and":[49,60,85],"novel":[51],"packet-wise":[52],"CSR":[53],"matrix":[54],"compression,":[55],"enabling":[56],"custom":[57],"data":[58],"layouts":[59],"delivering":[61],"bandwidth":[62],"efficiency":[63],"often":[64],"unreachable":[65],"even":[66],"in":[67],"architectures":[68],"with":[69,91,95],"higher":[70,93,97],"peak":[71],"bandwidth.":[72],"With":[73],"HBM-based":[74],"boards,":[75],"we":[76],"are":[77],"100x":[78],"faster":[79,87],"than":[80,88],"multi-threaded":[82],"CPU":[83],"implementation":[84],"2x":[86],"GPU":[90],"20%":[92],"bandwidth,":[94],"14.2x":[96],"power-efficiency.":[98]},"counts_by_year":[{"year":2021,"cited_by_count":4}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
