{"id":"https://openalex.org/W7155046096","doi":"https://doi.org/10.48550/arxiv.2604.17198","title":"Partitioning Unstructured Sparse Tensor Algebra for Load-Balanced Parallel Execution","display_name":"Partitioning Unstructured Sparse Tensor Algebra for Load-Balanced Parallel Execution","publication_year":2026,"publication_date":"2026-04-19","ids":{"openalex":"https://openalex.org/W7155046096","doi":"https://doi.org/10.48550/arxiv.2604.17198"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.17198","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17198","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.17198","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134208038","display_name":"Atharva Chougule","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chougule, Atharva","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134100516","display_name":"Alexander J Root","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Root, Alexander J","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032530949","display_name":"Rubens Lacouture","orcid":"https://orcid.org/0009-0008-2268-0074"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lacouture, Rubens","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019401621","display_name":"Bobby Yan","orcid":"https://orcid.org/0009-0002-6792-6222"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan, Bobby","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047980263","display_name":"Rohan Yadav","orcid":"https://orcid.org/0000-0003-0746-066X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yadav, Rohan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134139451","display_name":"Fredrik Kjolstad","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kjolstad, Fredrik","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6252999901771545,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6252999901771545,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12303","display_name":"Tensor decomposition and applications","score":0.1526000052690506,"subfield":{"id":"https://openalex.org/subfields/2605","display_name":"Computational Mathematics"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.04390000179409981,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.6952999830245972},{"id":"https://openalex.org/keywords/tensor-algebra","display_name":"Tensor algebra","score":0.6729000210762024},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.5396999716758728},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5123000144958496},{"id":"https://openalex.org/keywords/linear-algebra","display_name":"Linear algebra","score":0.45179998874664307},{"id":"https://openalex.org/keywords/parallel-algorithm","display_name":"Parallel algorithm","score":0.41749998927116394},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4115000069141388},{"id":"https://openalex.org/keywords/algebra-over-a-field","display_name":"Algebra over a field","score":0.3700000047683716}],"concepts":[{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.6952999830245972},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6926000118255615},{"id":"https://openalex.org/C1680195","wikidata":"https://www.wikidata.org/wiki/Q2296021","display_name":"Tensor algebra","level":5,"score":0.6729000210762024},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.5396999716758728},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5346999764442444},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5123000144958496},{"id":"https://openalex.org/C139352143","wikidata":"https://www.wikidata.org/wiki/Q82571","display_name":"Linear algebra","level":2,"score":0.45179998874664307},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4284000098705292},{"id":"https://openalex.org/C120373497","wikidata":"https://www.wikidata.org/wiki/Q1087987","display_name":"Parallel algorithm","level":2,"score":0.41749998927116394},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4115000069141388},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.38260000944137573},{"id":"https://openalex.org/C136119220","wikidata":"https://www.wikidata.org/wiki/Q1000660","display_name":"Algebra over a field","level":2,"score":0.3700000047683716},{"id":"https://openalex.org/C124066611","wikidata":"https://www.wikidata.org/wiki/Q28684319","display_name":"Sparse approximation","level":2,"score":0.34220001101493835},{"id":"https://openalex.org/C95916125","wikidata":"https://www.wikidata.org/wiki/Q840540","display_name":"Relational algebra","level":3,"score":0.3206999897956848},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.31189998984336853},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.28780001401901245},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.2858000099658966},{"id":"https://openalex.org/C106515295","wikidata":"https://www.wikidata.org/wiki/Q26806595","display_name":"Parallel processing","level":2,"score":0.27959999442100525},{"id":"https://openalex.org/C163834973","wikidata":"https://www.wikidata.org/wiki/Q2004891","display_name":"Numerical linear algebra","level":3,"score":0.27320000529289246},{"id":"https://openalex.org/C60321788","wikidata":"https://www.wikidata.org/wiki/Q1197190","display_name":"Multilinear algebra","level":5,"score":0.2727999985218048},{"id":"https://openalex.org/C51255310","wikidata":"https://www.wikidata.org/wiki/Q1163016","display_name":"Tensor product","level":2,"score":0.26409998536109924},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.2563000023365021},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.25380000472068787},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.17198","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17198","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.17198","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.17198","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Sparse":[0],"tensor":[1,35,68,77,123],"algebra":[2,36,69,78],"is":[3,92],"challenging":[4],"to":[5,9,48,54,72],"efficiently":[6],"parallelize":[7],"due":[8],"the":[10,22,30],"irregular,":[11],"data-dependent,":[12],"and":[13,53,84,105,111,116],"potentially":[14],"skewed":[15],"structure":[16],"of":[17,32,51,109,114,133],"sparse":[18,34,57,67,76,122],"computation.":[19],"We":[20,60,86],"propose":[21],"first":[23],"partitioning":[24],"algorithm":[25,43,63],"that":[26,80,88],"provably":[27],"load":[28],"balances":[29],"computation":[31],"any":[33,49],"expression":[37],"across":[38],"parallel":[39,45,75],"execution":[40],"units.":[41],"Our":[42],"generalizes":[44],"merging":[46],"algorithms":[47,127],"number":[50],"operands,":[52],"multi-dimensional,":[55],"hierarchical":[56],"data":[58],"structures.":[59],"implement":[61],"our":[62,89],"within":[64],"an":[65],"existing":[66],"compilation":[70],"framework":[71],"automatically":[73],"generate":[74],"kernels":[79],"target":[81],"multi-core":[82],"CPUs":[83],"GPUs.":[85],"show":[87],"generated":[90],"code":[91],"competitive":[93],"with":[94],"hand-implemented":[95],"parallelization":[96],"strategies":[97,120],"used":[98],"by":[99],"vendor":[100],"libraries":[101],"like":[102],"Intel":[103],"MKL":[104],"NVIDIA":[106],"cuSPARSE":[107],"(geo-means":[108,113,132],"$0.73$--$3.4\\times$)":[110],"\\textsc{Taco}":[112],"$1.0$--$2.4\\times$),":[115],"significantly":[117],"outperforms":[118],"general-purpose":[119],"for":[121],"expressions":[124],"where":[125],"specialized":[126],"have":[128],"not":[129],"been":[130],"developed":[131],"$2.0$--$6.4\\times$).":[134]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-22T00:00:00"}
