{"id":"https://openalex.org/W4312538816","doi":"https://doi.org/10.1109/iscas48785.2022.9937893","title":"A Mixed Precision, Multi-GPU Design for Large-scale Top-K Sparse Eigenproblems","display_name":"A Mixed Precision, Multi-GPU Design for Large-scale Top-K Sparse Eigenproblems","publication_year":2022,"publication_date":"2022-05-28","ids":{"openalex":"https://openalex.org/W4312538816","doi":"https://doi.org/10.1109/iscas48785.2022.9937893"},"language":"en","primary_location":{"id":"doi:10.1109/iscas48785.2022.9937893","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscas48785.2022.9937893","pdf_url":null,"source":{"id":"https://openalex.org/S4363604393","display_name":"2022 IEEE International Symposium on Circuits and Systems (ISCAS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Symposium on Circuits and Systems (ISCAS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041923708","display_name":"Francesco Sgherzi","orcid":"https://orcid.org/0000-0001-8319-7491"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Francesco Sgherzi","raw_affiliation_strings":["Politecnico di Milano,DEIB,Milan,Italy","Politecnico di Milano, DEIB, Milan, Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Milano,DEIB,Milan,Italy","institution_ids":["https://openalex.org/I93860229"]},{"raw_affiliation_string":"Politecnico di Milano, DEIB, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000548939","display_name":"Alberto Parravicini","orcid":"https://orcid.org/0000-0001-8806-1665"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Alberto Parravicini","raw_affiliation_strings":["Politecnico di Milano,DEIB,Milan,Italy","Politecnico di Milano, DEIB, Milan, Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Milano,DEIB,Milan,Italy","institution_ids":["https://openalex.org/I93860229"]},{"raw_affiliation_string":"Politecnico di Milano, DEIB, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010543929","display_name":"Marco D. Santambrogio","orcid":"https://orcid.org/0000-0002-9883-9693"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Marco D. Santambrogio","raw_affiliation_strings":["Politecnico di Milano,DEIB,Milan,Italy","Politecnico di Milano, DEIB, Milan, Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Milano,DEIB,Milan,Italy","institution_ids":["https://openalex.org/I93860229"]},{"raw_affiliation_string":"Politecnico di Milano, DEIB, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5041923708"],"corresponding_institution_ids":["https://openalex.org/I93860229"],"apc_list":null,"apc_paid":null,"fwci":1.5581,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.83745875,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1259","last_page":"1263"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/double-precision-floating-point-format","display_name":"Double-precision floating-point format","score":0.7936092615127563},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7632877230644226},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6725497245788574},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6331119537353516},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6039316058158875},{"id":"https://openalex.org/keywords/single-precision-floating-point-format","display_name":"Single-precision floating-point format","score":0.5278639197349548},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5257088541984558},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5136448740959167},{"id":"https://openalex.org/keywords/arbitrary-precision-arithmetic","display_name":"Arbitrary-precision arithmetic","score":0.5038112998008728},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.4940667450428009},{"id":"https://openalex.org/keywords/eigenvalues-and-eigenvectors","display_name":"Eigenvalues and eigenvectors","score":0.4827112853527069},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.47415441274642944},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.46589481830596924},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.43475961685180664},{"id":"https://openalex.org/keywords/partition","display_name":"Partition (number theory)","score":0.41826778650283813},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.2018032670021057},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16555795073509216},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.10630163550376892}],"concepts":[{"id":"https://openalex.org/C35912277","wikidata":"https://www.wikidata.org/wiki/Q1243369","display_name":"Double-precision floating-point format","level":3,"score":0.7936092615127563},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7632877230644226},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6725497245788574},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6331119537353516},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6039316058158875},{"id":"https://openalex.org/C133095886","wikidata":"https://www.wikidata.org/wiki/Q1307173","display_name":"Single-precision floating-point format","level":3,"score":0.5278639197349548},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5257088541984558},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5136448740959167},{"id":"https://openalex.org/C83581934","wikidata":"https://www.wikidata.org/wiki/Q527381","display_name":"Arbitrary-precision arithmetic","level":2,"score":0.5038112998008728},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.4940667450428009},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.4827112853527069},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.47415441274642944},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.46589481830596924},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.43475961685180664},{"id":"https://openalex.org/C42812","wikidata":"https://www.wikidata.org/wiki/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.41826778650283813},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2018032670021057},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16555795073509216},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.10630163550376892},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscas48785.2022.9937893","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscas48785.2022.9937893","pdf_url":null,"source":{"id":"https://openalex.org/S4363604393","display_name":"2022 IEEE International Symposium on Circuits and Systems (ISCAS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Symposium on Circuits and Systems (ISCAS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1506690472","https://openalex.org/W1593940260","https://openalex.org/W1841592590","https://openalex.org/W1965127298","https://openalex.org/W2035080386","https://openalex.org/W2046725790","https://openalex.org/W2094269821","https://openalex.org/W2117686912","https://openalex.org/W2119158865","https://openalex.org/W2130748128","https://openalex.org/W2145541974","https://openalex.org/W2155967869","https://openalex.org/W2165874743","https://openalex.org/W2760120180","https://openalex.org/W2766597226","https://openalex.org/W2767023738","https://openalex.org/W2926767350","https://openalex.org/W3101708369","https://openalex.org/W3126890733","https://openalex.org/W3170921841","https://openalex.org/W3174230151","https://openalex.org/W3201351024","https://openalex.org/W4300171661","https://openalex.org/W6638783484","https://openalex.org/W6684578312","https://openalex.org/W6783450968"],"related_works":["https://openalex.org/W2116803521","https://openalex.org/W2773283032","https://openalex.org/W3150959508","https://openalex.org/W3150370983","https://openalex.org/W2239119680","https://openalex.org/W1564887326","https://openalex.org/W1571090276","https://openalex.org/W2185760795","https://openalex.org/W2012407419","https://openalex.org/W2930605373"],"abstract_inverted_index":{"Graph":[0],"analytics":[1],"techniques":[2],"based":[3],"on":[4,96],"spectral":[5],"methods":[6],"process":[7,71],"extremely":[8],"large":[9],"sparse":[10,26,48],"matrices":[11,52],"with":[12],"millions":[13],"or":[14],"even":[15],"billions":[16],"of":[17,30],"non-zero":[18],"values.":[19],"Behind":[20],"these":[21],"algorithms":[22],"lies":[23],"the":[24,28,31,46,66,90],"Top-K":[25,47],"eigenproblem,":[27],"computation":[29,67],"largest":[32],"eigenvalues":[33],"and":[34,74,77,100,123],"their":[35],"associated":[36],"eigenvectors.":[37],"In":[38],"this":[39],"work,":[40],"we":[41,85],"leverage":[42],"GPUs":[43],"to":[44,50],"scale":[45],"eigenproblem":[49],"bigger":[51],"than":[53,89,102,128],"previously":[54],"achieved":[55],"while":[56],"also":[57,109],"providing":[58],"state-of-the-art":[59],"execution":[60,78,116],"times.":[61],"We":[62,108],"can":[63],"transparently":[64],"partition":[65],"across":[68],"multiple":[69],"GPUs,":[70],"out-of-core":[72],"matrices,":[73],"tune":[75],"precision":[76],"time":[79,117],"using":[80],"mixed-precision":[81,112],"floating-point":[82,113,130],"arithmetic.":[83,131],"Overall,":[84],"are":[86],"67\u00d7":[87],"faster":[88],"highly":[91],"optimized":[92],"ARPACK":[93],"library":[94],"running":[95],"a":[97,103],"104-thread":[98],"CPU":[99],"1.9\u00d7":[101],"recent":[104],"FPGA":[105],"hardware":[106],"design.":[107],"determine":[110],"how":[111],"arithmetic":[114],"improves":[115],"by":[118],"50":[119],"%":[120],"over":[121],"double-precision,":[122],"is":[124],"12\u00d7":[125],"more":[126],"accurate":[127],"single-precision":[129]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
