{"id":"https://openalex.org/W4394998995","doi":"https://doi.org/10.1145/3620665.3640410","title":"T3: Transparent Tracking &amp; Triggering for Fine-grained Overlap of Compute &amp; Collectives","display_name":"T3: Transparent Tracking &amp; Triggering for Fine-grained Overlap of Compute &amp; Collectives","publication_year":2024,"publication_date":"2024-04-22","ids":{"openalex":"https://openalex.org/W4394998995","doi":"https://doi.org/10.1145/3620665.3640410"},"language":"en","primary_location":{"id":"doi:10.1145/3620665.3640410","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620665.3640410","pdf_url":null,"source":null,"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3620665.3640410","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015196366","display_name":"Suchita Pati","orcid":"https://orcid.org/0009-0008-1083-1146"},"institutions":[{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]},{"id":"https://openalex.org/I4210137977","display_name":"Advanced Micro Devices (United States)","ror":"https://ror.org/04kd6c783","country_code":"US","type":"company","lineage":["https://openalex.org/I4210137977"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Suchita Pati","raw_affiliation_strings":["Computer Sciences Department, University of Wisconsin-Madison, Madison, Wisconsin, USA","Research and Advanced Development (RAD), Advanced Micro Devices, Inc, Santa Clara, California, USA","Computer Sciences Department, University of Wisconsin-Madison, Madison, Wisconsin, USA Research and Advanced Development (RAD), Advanced Micro Devices, Inc, Santa Clara, California, USA"],"affiliations":[{"raw_affiliation_string":"Computer Sciences Department, University of Wisconsin-Madison, Madison, Wisconsin, USA","institution_ids":["https://openalex.org/I135310074"]},{"raw_affiliation_string":"Research and Advanced Development (RAD), Advanced Micro Devices, Inc, Santa Clara, California, USA","institution_ids":["https://openalex.org/I4210137977"]},{"raw_affiliation_string":"Computer Sciences Department, University of Wisconsin-Madison, Madison, Wisconsin, USA Research and Advanced Development (RAD), Advanced Micro Devices, Inc, Santa Clara, California, USA","institution_ids":["https://openalex.org/I4210137977","https://openalex.org/I135310074"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043935249","display_name":"Shaizeen Aga","orcid":"https://orcid.org/0000-0001-9552-0508"},"institutions":[{"id":"https://openalex.org/I4210137977","display_name":"Advanced Micro Devices (United States)","ror":"https://ror.org/04kd6c783","country_code":"US","type":"company","lineage":["https://openalex.org/I4210137977"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shaizeen Aga","raw_affiliation_strings":["Research and Advanced Development (RAD), Advanced Micro Devices, Inc, Santa Clara, California, United States"],"affiliations":[{"raw_affiliation_string":"Research and Advanced Development (RAD), Advanced Micro Devices, Inc, Santa Clara, California, United States","institution_ids":["https://openalex.org/I4210137977"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047769920","display_name":"Mahzabeen Islam","orcid":"https://orcid.org/0000-0001-9980-9720"},"institutions":[{"id":"https://openalex.org/I4210137977","display_name":"Advanced Micro Devices (United States)","ror":"https://ror.org/04kd6c783","country_code":"US","type":"company","lineage":["https://openalex.org/I4210137977"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mahzabeen Islam","raw_affiliation_strings":["Research and Advanced Development (RAD), Advanced Micro Devices, Inc, Austin, Texas, United States"],"affiliations":[{"raw_affiliation_string":"Research and Advanced Development (RAD), Advanced Micro Devices, Inc, Austin, Texas, United States","institution_ids":["https://openalex.org/I4210137977"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060736283","display_name":"Nuwan Jayasena","orcid":"https://orcid.org/0009-0005-2973-9479"},"institutions":[{"id":"https://openalex.org/I4210137977","display_name":"Advanced Micro Devices (United States)","ror":"https://ror.org/04kd6c783","country_code":"US","type":"company","lineage":["https://openalex.org/I4210137977"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nuwan Jayasena","raw_affiliation_strings":["Research and Advanced Development (RAD), Advanced Micro Devices, Inc, Santa Clara, California, United States"],"affiliations":[{"raw_affiliation_string":"Research and Advanced Development (RAD), Advanced Micro Devices, Inc, Santa Clara, California, United States","institution_ids":["https://openalex.org/I4210137977"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047054160","display_name":"Matthew D. Sinclair","orcid":"https://orcid.org/0000-0003-0189-7895"},"institutions":[{"id":"https://openalex.org/I4210137977","display_name":"Advanced Micro Devices (United States)","ror":"https://ror.org/04kd6c783","country_code":"US","type":"company","lineage":["https://openalex.org/I4210137977"]},{"id":"https://openalex.org/I135310074","display_name":"University of Wisconsin\u2013Madison","ror":"https://ror.org/01y2jtd41","country_code":"US","type":"education","lineage":["https://openalex.org/I135310074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matthew D. Sinclair","raw_affiliation_strings":["Computer Sciences Department, University of Wisconsin-Madison, Madison, Wisconsin, United States","Research and Advanced Development (RAD), Advanced Micro Devices, Inc, Austin, Texas, USA","Computer Sciences Department, University of Wisconsin-Madison, Madison, Wisconsin, United States Research and Advanced Development (RAD), Advanced Micro Devices, Inc, Austin, Texas, USA"],"affiliations":[{"raw_affiliation_string":"Computer Sciences Department, University of Wisconsin-Madison, Madison, Wisconsin, United States","institution_ids":["https://openalex.org/I135310074"]},{"raw_affiliation_string":"Research and Advanced Development (RAD), Advanced Micro Devices, Inc, Austin, Texas, USA","institution_ids":["https://openalex.org/I4210137977"]},{"raw_affiliation_string":"Computer Sciences Department, University of Wisconsin-Madison, Madison, Wisconsin, United States Research and Advanced Development (RAD), Advanced Micro Devices, Inc, Austin, Texas, USA","institution_ids":["https://openalex.org/I4210137977","https://openalex.org/I135310074"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5015196366"],"corresponding_institution_ids":["https://openalex.org/I135310074","https://openalex.org/I4210137977"],"apc_list":null,"apc_paid":null,"fwci":4.625,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.95803672,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1146","last_page":"1164"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8609780073165894},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.7048395276069641},{"id":"https://openalex.org/keywords/serialization","display_name":"Serialization","score":0.6344181299209595},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.5705780982971191},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.563534677028656},{"id":"https://openalex.org/keywords/interleaving","display_name":"Interleaving","score":0.5479065775871277},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5422088503837585},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5029117465019226},{"id":"https://openalex.org/keywords/models-of-communication","display_name":"Models of communication","score":0.501983642578125},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4853416979312897},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.42995262145996094},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.20941126346588135},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.1829007863998413},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.09454721212387085}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8609780073165894},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.7048395276069641},{"id":"https://openalex.org/C52723943","wikidata":"https://www.wikidata.org/wiki/Q1127410","display_name":"Serialization","level":2,"score":0.6344181299209595},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5705780982971191},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.563534677028656},{"id":"https://openalex.org/C28034677","wikidata":"https://www.wikidata.org/wiki/Q17092530","display_name":"Interleaving","level":2,"score":0.5479065775871277},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5422088503837585},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5029117465019226},{"id":"https://openalex.org/C158156997","wikidata":"https://www.wikidata.org/wiki/Q1416645","display_name":"Models of communication","level":2,"score":0.501983642578125},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4853416979312897},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.42995262145996094},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.20941126346588135},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.1829007863998413},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.09454721212387085},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3620665.3640410","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620665.3640410","pdf_url":null,"source":null,"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3620665.3640410","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620665.3640410","pdf_url":null,"source":null,"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1191637369","display_name":null,"funder_award_id":"ENS-1925485","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3870589548","display_name":null,"funder_award_id":"133-AAK8232","funder_id":"https://openalex.org/F4320309434","funder_display_name":"University of Wisconsin-Madison"},{"id":"https://openalex.org/G8269996584","display_name":null,"funder_award_id":"AAK4964-2022-23","funder_id":"https://openalex.org/F4320309434","funder_display_name":"University of Wisconsin-Madison"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320309434","display_name":"University of Wisconsin-Madison","ror":"https://ror.org/01y2jtd41"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W1979717209","https://openalex.org/W2001611463","https://openalex.org/W2097117768","https://openalex.org/W2153190325","https://openalex.org/W2183341477","https://openalex.org/W2412101011","https://openalex.org/W2474451066","https://openalex.org/W2612654866","https://openalex.org/W2612695082","https://openalex.org/W2624298171","https://openalex.org/W2759071281","https://openalex.org/W2787181861","https://openalex.org/W2791175987","https://openalex.org/W2901073342","https://openalex.org/W2914304175","https://openalex.org/W2951091066","https://openalex.org/W2963341956","https://openalex.org/W3043571714","https://openalex.org/W3072623287","https://openalex.org/W3100877701","https://openalex.org/W3102510044","https://openalex.org/W3119866685","https://openalex.org/W3129488589","https://openalex.org/W3188065709","https://openalex.org/W3189166979","https://openalex.org/W3190062760","https://openalex.org/W3193985311","https://openalex.org/W3199518308","https://openalex.org/W3205803342","https://openalex.org/W3206003350","https://openalex.org/W4231595696","https://openalex.org/W4244454515","https://openalex.org/W4281758439","https://openalex.org/W4287391717","https://openalex.org/W4297812065","https://openalex.org/W4310895557","https://openalex.org/W4311457721","https://openalex.org/W4312060029","https://openalex.org/W4318256787","https://openalex.org/W4321636575","https://openalex.org/W4381894573","https://openalex.org/W4387951242","https://openalex.org/W6838557027"],"related_works":["https://openalex.org/W1655266410","https://openalex.org/W4231356583","https://openalex.org/W1593760324","https://openalex.org/W2595172197","https://openalex.org/W2389051085","https://openalex.org/W2330343234","https://openalex.org/W1901012776","https://openalex.org/W2084856301","https://openalex.org/W2127970246","https://openalex.org/W4391272625"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"increasingly":[3],"rely":[4],"on":[5],"distributed":[6,32],"techniques":[7,14,33,44],"for":[8],"their":[9],"training":[10],"and":[11,36,85,101,109],"inference.":[12],"These":[13],"require":[15],"communication":[16,40,52,62,84],"across":[17],"devices":[18,28],"which":[19],"can":[20,34,89],"reduce":[21],"scaling":[22],"efficiency":[23],"as":[24,46,93],"the":[25,68,72],"number":[26],"of":[27,83],"increases.":[29],"While":[30],"some":[31],"overlap,":[35],"thus,":[37],"hide":[38,59],"this":[39,60,80],"with":[41,53,67,94],"independent":[42],"computations,":[43],"such":[45],"Tensor":[47],"Parallelism":[48],"(TP)":[49],"inherently":[50],"serialize":[51],"model":[54],"execution.":[55],"One":[56],"approach":[57],"to":[58,64,104],"serialized":[61],"is":[63],"interleave":[65],"it":[66,98],"producer":[69],"operation":[70],"(of":[71],"communicated":[73],"data)":[74],"in":[75,87],"a":[76],"fine-grained":[77,81],"manner.":[78],"However,":[79],"interleaving":[82],"computation":[86,108],"software":[88],"be":[90,105],"difficult.":[91],"Furthermore,":[92],"any":[95],"concurrent":[96],"execution,":[97],"requires":[99],"compute":[100],"memory":[102],"resources":[103],"shared":[106],"between":[107],"communication,":[110],"causing":[111],"resource":[112],"contention":[113],"that":[114],"reduces":[115],"overlapping":[116],"efficacy.":[117]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":2}],"updated_date":"2026-03-06T13:50:29.536080","created_date":"2025-10-10T00:00:00"}
