{"id":"https://openalex.org/W2165504907","doi":"https://doi.org/10.1177/1094342011414748","title":"Exploiting dense substructures for fast sparse matrix vector multiplication","display_name":"Exploiting dense substructures for fast sparse matrix vector multiplication","publication_year":2011,"publication_date":"2011-08-01","ids":{"openalex":"https://openalex.org/W2165504907","doi":"https://doi.org/10.1177/1094342011414748","mag":"2165504907"},"language":"en","primary_location":{"id":"doi:10.1177/1094342011414748","is_oa":false,"landing_page_url":"https://doi.org/10.1177/1094342011414748","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057428462","display_name":"Manu Shantharam","orcid":null},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Manu Shantharam","raw_affiliation_strings":["Department of Computer Science and Engineering The Pennsylvania State University University Park, PA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering The Pennsylvania State University University Park, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075247915","display_name":"Anirban Chatterjee","orcid":"https://orcid.org/0000-0003-2513-6433"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anirban Chatterjee","raw_affiliation_strings":["Department of Computer Science and Engineering The Pennsylvania State University University Park, PA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering The Pennsylvania State University University Park, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102868534","display_name":"Padma Raghavan","orcid":"https://orcid.org/0009-0002-6785-2112"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Padma Raghavan","raw_affiliation_strings":["Department of Computer Science and Engineering The Pennsylvania State University University Park, PA, USA,"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering The Pennsylvania State University University Park, PA, USA,","institution_ids":["https://openalex.org/I130769515"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102868534"],"corresponding_institution_ids":["https://openalex.org/I130769515"],"apc_list":null,"apc_paid":null,"fwci":0.5037,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.6914193,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"25","issue":"3","first_page":"328","last_page":"341"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7058634161949158},{"id":"https://openalex.org/keywords/adjacency-matrix","display_name":"Adjacency matrix","score":0.6834908723831177},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.6580456495285034},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6285514831542969},{"id":"https://openalex.org/keywords/multiplication","display_name":"Multiplication (music)","score":0.5626128315925598},{"id":"https://openalex.org/keywords/matrix","display_name":"Matrix (chemical analysis)","score":0.560505211353302},{"id":"https://openalex.org/keywords/row-and-column-spaces","display_name":"Row and column spaces","score":0.5403220653533936},{"id":"https://openalex.org/keywords/matrix-representation","display_name":"Matrix representation","score":0.514358639717102},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.49755170941352844},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.4927690327167511},{"id":"https://openalex.org/keywords/row","display_name":"Row","score":0.4911908805370331},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.48545461893081665},{"id":"https://openalex.org/keywords/sparse-approximation","display_name":"Sparse approximation","score":0.47844198346138},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.42619889974594116},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.34021949768066406},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.30178308486938477},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21567222476005554},{"id":"https://openalex.org/keywords/group","display_name":"Group (periodic table)","score":0.19105616211891174},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.12701600790023804}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7058634161949158},{"id":"https://openalex.org/C180356752","wikidata":"https://www.wikidata.org/wiki/Q727035","display_name":"Adjacency matrix","level":3,"score":0.6834908723831177},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.6580456495285034},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6285514831542969},{"id":"https://openalex.org/C2780595030","wikidata":"https://www.wikidata.org/wiki/Q3860309","display_name":"Multiplication (music)","level":2,"score":0.5626128315925598},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.560505211353302},{"id":"https://openalex.org/C104140500","wikidata":"https://www.wikidata.org/wiki/Q2088159","display_name":"Row and column spaces","level":3,"score":0.5403220653533936},{"id":"https://openalex.org/C103275481","wikidata":"https://www.wikidata.org/wiki/Q6787889","display_name":"Matrix representation","level":3,"score":0.514358639717102},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.49755170941352844},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.4927690327167511},{"id":"https://openalex.org/C135598885","wikidata":"https://www.wikidata.org/wiki/Q1366302","display_name":"Row","level":2,"score":0.4911908805370331},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.48545461893081665},{"id":"https://openalex.org/C124066611","wikidata":"https://www.wikidata.org/wiki/Q28684319","display_name":"Sparse approximation","level":2,"score":0.47844198346138},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.42619889974594116},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.34021949768066406},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.30178308486938477},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21567222476005554},{"id":"https://openalex.org/C2781311116","wikidata":"https://www.wikidata.org/wiki/Q83306","display_name":"Group (periodic table)","level":2,"score":0.19105616211891174},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.12701600790023804},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1177/1094342011414748","is_oa":false,"landing_page_url":"https://doi.org/10.1177/1094342011414748","pdf_url":null,"source":{"id":"https://openalex.org/S60606485","display_name":"The International Journal of High Performance Computing Applications","issn_l":"1094-3420","issn":["1094-3420","1741-2846"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of High Performance Computing Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306113","display_name":"U.S. Department of State","ror":"https://ror.org/03vvynj75"},{"id":"https://openalex.org/F4320310419","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W57989469","https://openalex.org/W83535271","https://openalex.org/W147926632","https://openalex.org/W1486620480","https://openalex.org/W1492326914","https://openalex.org/W1518969538","https://openalex.org/W1585892185","https://openalex.org/W1884140786","https://openalex.org/W1965351873","https://openalex.org/W1965551736","https://openalex.org/W1977556410","https://openalex.org/W1980714869","https://openalex.org/W1981885118","https://openalex.org/W1987393840","https://openalex.org/W1989689661","https://openalex.org/W1990832096","https://openalex.org/W1993274656","https://openalex.org/W2035080386","https://openalex.org/W2072806558","https://openalex.org/W2094333709","https://openalex.org/W2095420020","https://openalex.org/W2096559782","https://openalex.org/W2098602979","https://openalex.org/W2098841537","https://openalex.org/W2099625934","https://openalex.org/W2114977680","https://openalex.org/W2130289795","https://openalex.org/W2142496304","https://openalex.org/W2145194992","https://openalex.org/W2146530035","https://openalex.org/W2164661813","https://openalex.org/W2171404670","https://openalex.org/W2186799393","https://openalex.org/W2906604232","https://openalex.org/W3204708751","https://openalex.org/W4210347790","https://openalex.org/W4242695473","https://openalex.org/W4292166829"],"related_works":["https://openalex.org/W2123708549","https://openalex.org/W4200089575","https://openalex.org/W2042960494","https://openalex.org/W1966476169","https://openalex.org/W2040556424","https://openalex.org/W2013693725","https://openalex.org/W2139382975","https://openalex.org/W2542063421","https://openalex.org/W1540387851","https://openalex.org/W2165504907"],"abstract_inverted_index":{"The":[0],"execution":[1],"time":[2,12],"of":[3,31,70,108,115,127,207,277],"many":[4,45],"scientific":[5],"computing":[6],"applications":[7],"is":[8],"dominated":[9],"by":[10,35,90,99,186,204],"the":[11,29,37,62,71,88,109,113,123,141,151,156,176,190,214,225,231,235,246,253,256,275],"spent":[13],"in":[14,44,66,95,138,155],"performing":[15,248],"sparse":[16,46,72,193,288],"matrix":[17,73,89,224,254,289],"vector":[18],"multiplication":[19],"(SMV;":[20],"y":[21],"\u2190":[22],"A":[23],"\u00b7":[24],"x).":[25],"We":[26,158,169,258],"consider":[27],"improving":[28],"performance":[30,183,201,237],"SMV":[32,119,147,283],"on":[33,162,175,184,202,252],"multicores":[34],"exploiting":[36],"dense":[38,85,153],"substructures":[39,154],"that":[40,149,171,245,265,285],"are":[41,211],"inherently":[42],"present":[43],"matrices":[47,174],"derived":[48],"from":[49],"partial":[50],"differential":[51],"equation":[52],"models.":[53],"First,":[54],"we":[55,82,111,131,220],"identify":[56,83],"indistinguishable":[57],"vertices,":[58],"i.e.,":[59],"vertices":[60],"with":[61,189],"same":[63],"adjacency":[64],"structure,":[65],"a":[67,79,101],"graph":[68],"representation":[69,107],"(A)":[74],"and":[75,93,165,230,255,291],"group":[76],"them":[77],"into":[78],"supernode.":[80,97],"Next,":[81],"effectively":[84,152],"blocks":[86],"within":[87],"grouping":[91],"rows":[92],"columns":[94],"each":[96,223],"Finally,":[98],"using":[100],"suitable":[102],"data":[103],"structure":[104,126],"for":[105,172,213,222,269,279,294],"this":[106],"matrix,":[110],"reduce":[112],"number":[114],"load":[116],"operations":[117],"during":[118],"while":[120],"exactly":[121],"preserving":[122],"original":[124],"sparsity":[125],"A.":[128],"In":[129],"addition,":[130],"use":[132],"ordering":[133],"techniques":[134],"to":[135,140,144,240,274],"enhance":[136],"locality":[137],"accesses":[139],"vector,":[142],"x,":[143],"yield":[145],"an":[146,261,280],"kernel":[148,284],"exploits":[150],"matrix.":[157],"evaluate":[159],"our":[160,180,208,228],"scheme":[161,195,249],"Intel":[163,177],"Nehalem":[164,178],"AMD":[166,215],"Shanghai":[167],"processors.":[168],"observe":[170],"larger":[173],"processor,":[179],"method":[181,229,270],"improves":[182,200],"average":[185,203,236],"37.35%":[187],"compared":[188],"traditional":[191],"compressed":[192,198,233],"row":[194],"(a":[196],"blocked":[197,232],"form":[199],"30.27%).":[205],"Benefits":[206],"new":[209],"format":[210],"similar":[212],"processor.":[216],"More":[217],"importantly,":[218],"if":[219],"pick":[221],"best":[226,247],"among":[227],"scheme,":[234],"improvements":[238],"increase":[239],"40.85%.":[241],"Additional":[242],"results":[243],"indicate":[244],"varies":[250],"depending":[251],"system.":[257],"therefore":[259],"propose":[260],"effective":[262],"density":[263],"measure":[264],"could":[266],"be":[267],"used":[268],"selection,":[271],"thus":[272],"adding":[273],"variety":[276],"options":[278],"auto-tuned":[281],"optimized":[282],"can":[286],"exploit":[287],"properties":[290],"hardware":[292],"attributes":[293],"high":[295],"performance.":[296]},"counts_by_year":[{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
