{"id":"https://openalex.org/W4294433898","doi":"https://doi.org/10.1145/3545008.3545087","title":"Tesseract: Parallelize the Tensor Parallelism Efficiently","display_name":"Tesseract: Parallelize the Tensor Parallelism Efficiently","publication_year":2022,"publication_date":"2022-08-29","ids":{"openalex":"https://openalex.org/W4294433898","doi":"https://doi.org/10.1145/3545008.3545087"},"language":"en","primary_location":{"id":"doi:10.1145/3545008.3545087","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3545008.3545087","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 51st International Conference on Parallel Processing","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2105.14500","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102708793","display_name":"Boxiang Wang","orcid":"https://orcid.org/0000-0003-3622-6020"},"institutions":[{"id":"https://openalex.org/I168639165","display_name":"Singapore Institute of Technology","ror":"https://ror.org/01v2c2791","country_code":"SG","type":"education","lineage":["https://openalex.org/I168639165"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Boxiang Wang","raw_affiliation_strings":["HPC-AI Technology Inc., Singapore"],"affiliations":[{"raw_affiliation_string":"HPC-AI Technology Inc., Singapore","institution_ids":["https://openalex.org/I168639165"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068927255","display_name":"Qifan Xu","orcid":"https://orcid.org/0000-0002-7155-9552"},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qifan Xu","raw_affiliation_strings":["University of California, Los Angeles, United States of America"],"affiliations":[{"raw_affiliation_string":"University of California, Los Angeles, United States of America","institution_ids":["https://openalex.org/I161318765"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011091722","display_name":"Zhengda Bian","orcid":"https://orcid.org/0000-0002-1906-1781"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhengda Bian","raw_affiliation_strings":["HPC-AI Technology Inc., China"],"affiliations":[{"raw_affiliation_string":"HPC-AI Technology Inc., China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100658705","display_name":"Yang You","orcid":"https://orcid.org/0000-0003-2816-4384"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yang You","raw_affiliation_strings":["School of Computing, National University of Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Computing, National University of Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102708793"],"corresponding_institution_ids":["https://openalex.org/I168639165"],"apc_list":null,"apc_paid":null,"fwci":2.7513,"has_fulltext":false,"cited_by_count":29,"citation_normalized_percentile":{"value":0.91792249,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"11"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9958000183105469,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9864000082015991,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.85271155834198},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7564084529876709},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7054153680801392},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6945343613624573},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.5605500936508179},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.5411614179611206},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5241637229919434},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.4897298812866211},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.36762869358062744},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10523721575737}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.85271155834198},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7564084529876709},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7054153680801392},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6945343613624573},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.5605500936508179},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.5411614179611206},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5241637229919434},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.4897298812866211},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.36762869358062744},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10523721575737},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3545008.3545087","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3545008.3545087","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 51st International Conference on Parallel Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2105.14500","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2105.14500","pdf_url":"https://arxiv.org/pdf/2105.14500","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2105.14500","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2105.14500","pdf_url":"https://arxiv.org/pdf/2105.14500","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W201315547","https://openalex.org/W1598993252","https://openalex.org/W1813030236","https://openalex.org/W2056999868","https://openalex.org/W2117539524","https://openalex.org/W2338908902","https://openalex.org/W2622263826","https://openalex.org/W2757910899","https://openalex.org/W2763421725","https://openalex.org/W2769856846","https://openalex.org/W2884711234","https://openalex.org/W2892181857","https://openalex.org/W2901541570","https://openalex.org/W2925006810","https://openalex.org/W2926655273","https://openalex.org/W2962747323","https://openalex.org/W2963341956","https://openalex.org/W2969388332","https://openalex.org/W2973727699","https://openalex.org/W2974008169","https://openalex.org/W2991040477","https://openalex.org/W2995435108","https://openalex.org/W3094502228","https://openalex.org/W3159351344","https://openalex.org/W3205803342","https://openalex.org/W4205968302","https://openalex.org/W4292779060","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W1966837078","https://openalex.org/W2950520577","https://openalex.org/W1501159154","https://openalex.org/W1554644772","https://openalex.org/W2003935582","https://openalex.org/W2494130044","https://openalex.org/W3170887803","https://openalex.org/W2963831937","https://openalex.org/W74409296","https://openalex.org/W3209384898"],"abstract_inverted_index":{"Together":[0],"with":[1,104,176,219],"the":[2,54,75,117,125,134,146,162],"improvements":[3],"in":[4,169],"state-of-the-art":[5],"accuracies":[6],"of":[7,137,148,171,187],"various":[8],"tasks,":[9],"deep":[10,216],"learning":[11,217],"models":[12,26,37,218],"are":[13],"getting":[14],"significantly":[15],"larger.":[16],"However,":[17,77],"it":[18,32,48],"is":[19,49],"extremely":[20],"difficult":[21],"to":[22,34,52,69,72,152,160,198,213],"implement":[23,214],"these":[24,78,95],"large":[25,36,58,215],"because":[27],"limited":[28,220],"GPU":[29,41,45,221],"memory":[30,118,135],"makes":[31],"impossible":[33],"fit":[35],"into":[38,128],"a":[39,44,65,81,86,105,185,207],"single":[40],"or":[42],"even":[43],"server.":[46],"Besides,":[47],"highly":[50,100],"necessary":[51],"reduce":[53,161],"training":[55],"time":[56],"for":[57,120],"models.":[59],"Previous":[60],"methods":[61,79],"like":[62],"Megatron-LM":[63],"implemented":[64],"1-Dimensional":[66],"distributed":[67],"method":[68],"use":[70],"GPUs":[71],"speed":[73],"up":[74],"training.":[76],"have":[80],"high":[82],"communication":[83,113,163],"overhead":[84,114],"and":[85,115,155,173,192,210],"low":[87],"scaling":[88,181],"efficiency":[89,110],"on":[90,165],"large-scale":[91],"clusters.":[92],"To":[93],"solve":[94],"problems,":[96],"we":[97,205],"propose":[98],"Tesseract,":[99,204],"scalable":[101,211],"tensor":[102,129,138,149],"parallelism":[103],"novel":[106,126],"design.":[107],"It":[108],"increases":[109,133,145],"by":[111],"reducing":[112],"lowers":[116],"required":[119],"each":[121,166],"GPU.":[122],"By":[123,202],"introducing":[124,203],"dimension":[127,143],"parallelism,":[130],"Tesseract":[131,158,183],"greatly":[132],"capacity":[136],"parallelism.":[139,150],"Concretely,":[140],"this":[141],"new":[142],"furthermore":[144],"degree":[147],"Compared":[151],"previous":[153],"1-D":[154],"2-D":[156],"methods,":[157,200],"manages":[159],"cost":[164],"layer,":[167],"resulting":[168],"speedups":[170],"1.38x":[172],"1.53x":[174],"respectively":[175],"strong":[177],"scaling.":[178],"In":[179],"weak":[180],"experiments,":[182],"achieves":[184],"maximum":[186],"4.0/1.7":[188],"times":[189,194],"inference":[190],"speedup":[191],"3.4/1.7":[193],"throughput":[195],"improvement":[196],"compared":[197],"1-D/2-D":[199],"respectively.":[201],"offer":[206],"more":[208],"efficient":[209],"way":[212],"resources.":[222]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":17},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":2}],"updated_date":"2026-03-27T14:29:43.386196","created_date":"2025-10-10T00:00:00"}
