{"id":"https://openalex.org/W4416189531","doi":"https://doi.org/10.1145/3779212.3790229","title":"Streaming Tensor Programs: A Streaming Abstraction for Dynamic Parallelism","display_name":"Streaming Tensor Programs: A Streaming Abstraction for Dynamic Parallelism","publication_year":2026,"publication_date":"2026-03-10","ids":{"openalex":"https://openalex.org/W4416189531","doi":"https://doi.org/10.1145/3779212.3790229"},"language":null,"primary_location":{"id":"doi:10.1145/3779212.3790229","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3779212.3790229","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3779212.3790229","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085031335","display_name":"Gina Sohn","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Gina Sohn","raw_affiliation_strings":["Stanford University, Stanford, CA, USA"],"raw_orcid":"https://orcid.org/0009-0006-1899-1043","affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049888384","display_name":"Genghan Zhang","orcid":"https://orcid.org/0000-0002-3866-8167"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Genghan Zhang","raw_affiliation_strings":["Stanford University, Stanford, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-3866-8167","affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5098936747","display_name":"Konstantin J. Ho\u00dffeld","orcid":"https://orcid.org/0009-0005-9542-3317"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Konstantin Hossfeld","raw_affiliation_strings":["Stanford University, Stanford, CA, USA"],"raw_orcid":"https://orcid.org/0009-0005-9542-3317","affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jungwoo Kim","orcid":"https://orcid.org/0000-0003-4773-9251"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jungwoo Kim","raw_affiliation_strings":["Stanford University, Stanford, CA, USA"],"raw_orcid":"https://orcid.org/0000-0003-4773-9251","affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120648600","display_name":"Nathan Sobotka","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nathan Sobotka","raw_affiliation_strings":["Stanford University, Stanford, CA, USA"],"raw_orcid":"https://orcid.org/0009-0009-7994-4875","affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Nathan Zhang","orcid":"https://orcid.org/0000-0002-9668-902X"},"institutions":[{"id":"https://openalex.org/I4210099960","display_name":"Systems Control (United States)","ror":"https://ror.org/014tted12","country_code":"US","type":"company","lineage":["https://openalex.org/I4210099960"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nathan Zhang","raw_affiliation_strings":["SambaNova Systems, Inc, Palo Alto, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-9668-902X","affiliations":[{"raw_affiliation_string":"SambaNova Systems, Inc, Palo Alto, CA, USA","institution_ids":["https://openalex.org/I4210099960"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039681071","display_name":"Olivia Hsu","orcid":"https://orcid.org/0000-0002-4195-8106"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Olivia Hsu","raw_affiliation_strings":["Stanford University, Stanford, CA, USA and Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":"https://orcid.org/0000-0002-4195-8106","affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA, USA and Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023857198","display_name":"Kunle Olukotun","orcid":"https://orcid.org/0000-0002-8779-0636"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kunle Olukotun","raw_affiliation_strings":["Stanford University, Stanford, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-8779-0636","affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, CA, USA","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5085031335"],"corresponding_institution_ids":["https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00621977,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1912","last_page":"1932"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5328999757766724,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.5328999757766724,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.13079999387264252,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10126","display_name":"Logic, programming, and type systems","score":0.08590000122785568,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.8840000033378601},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.692799985408783},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5920000076293945},{"id":"https://openalex.org/keywords/tensor","display_name":"Tensor (intrinsic definition)","score":0.54339998960495},{"id":"https://openalex.org/keywords/dynamic-programming","display_name":"Dynamic programming","score":0.42800000309944153},{"id":"https://openalex.org/keywords/dynamic-data","display_name":"Dynamic data","score":0.412200003862381},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.41200000047683716},{"id":"https://openalex.org/keywords/subroutine","display_name":"Subroutine","score":0.3747999966144562}],"concepts":[{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.8840000033378601},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8555999994277954},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.692799985408783},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6347000002861023},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5920000076293945},{"id":"https://openalex.org/C155281189","wikidata":"https://www.wikidata.org/wiki/Q3518150","display_name":"Tensor (intrinsic definition)","level":2,"score":0.54339998960495},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.42800000309944153},{"id":"https://openalex.org/C197298091","wikidata":"https://www.wikidata.org/wiki/Q5318963","display_name":"Dynamic data","level":2,"score":0.412200003862381},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.41200000047683716},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4077000021934509},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.40540000796318054},{"id":"https://openalex.org/C96147967","wikidata":"https://www.wikidata.org/wiki/Q190686","display_name":"Subroutine","level":2,"score":0.3747999966144562},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.35429999232292175},{"id":"https://openalex.org/C107027933","wikidata":"https://www.wikidata.org/wiki/Q2006448","display_name":"Stream processing","level":2,"score":0.3222000002861023},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.29490000009536743},{"id":"https://openalex.org/C29013271","wikidata":"https://www.wikidata.org/wiki/Q7544","display_name":"Wormhole","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C160191386","wikidata":"https://www.wikidata.org/wiki/Q868299","display_name":"Control flow","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.29280000925064087},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.2921000123023987},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.29089999198913574},{"id":"https://openalex.org/C118702147","wikidata":"https://www.wikidata.org/wiki/Q189396","display_name":"Dynamic random-access memory","level":3,"score":0.2865000069141388},{"id":"https://openalex.org/C147358964","wikidata":"https://www.wikidata.org/wiki/Q1200992","display_name":"Abstraction layer","level":3,"score":0.2856000065803528},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.27390000224113464},{"id":"https://openalex.org/C121375916","wikidata":"https://www.wikidata.org/wiki/Q936559","display_name":"Principle of compositionality","level":2,"score":0.26330000162124634}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3779212.3790229","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3779212.3790229","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2511.07776","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2511.07776","pdf_url":"https://arxiv.org/pdf/2511.07776","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:doi:10.48550/arxiv.2511.07776","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2511.07776","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.07776","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1145/3779212.3790229","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3779212.3790229","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Dynamic":[0],"behaviors":[1,39,108],"are":[2],"becoming":[3],"prevalent":[4],"in":[5],"tensor":[6,18,62,87],"applications,":[7],"like":[8,94],"machine":[9],"learning,":[10],"where":[11],"many":[12],"widely":[13],"used":[14],"models":[15],"contain":[16],"data-dependent":[17],"shapes":[19],"and":[20,78,86,99,121,147,160],"control":[21],"flow.":[22],"However,":[23],"the":[24,134],"limited":[25],"expressiveness":[26],"of":[27],"prior":[28,138,157],"programming":[29],"abstractions":[30,159],"for":[31],"spatial":[32],"dataflow":[33,111],"accelerators":[34],"(SDAs)":[35],"forces":[36],"these":[37,48],"dynamic":[38,61,83,95,97,107,130,140],"to":[40,64,106],"be":[41],"implemented":[42],"statically":[43],"and/or":[44],"unoptimized.":[45],"To":[46],"address":[47],"challenges,":[49],"we":[50],"present":[51],"Streaming":[52],"Tensor":[53],"Programs":[54],"(STeP),":[55],"a":[56,114,122],"streaming":[57],"abstraction":[58],"that":[59,81,102,132,142,150],"enables":[60],"workloads":[63],"run":[65],"efficiently":[66],"on":[67,117],"SDAs.":[68],"STeP":[69,128],"introduces":[70],"flexible":[71],"routing":[72],"operators,":[73],"an":[74],"explicit":[75],"memory":[76],"hierarchy,":[77],"symbolic-shape":[79],"semantics":[80],"expose":[82],"data":[84],"rates":[85],"dimensions.":[88],"These":[89],"capabilities":[90],"unlock":[91],"new":[92],"optimizations,":[93],"tiling,":[96],"parallelization,":[98],"configuration":[100,148],"time-multiplexing,":[101],"adapt":[103],"SDA":[104,158],"execution":[105],"while":[109],"preserving":[110],"efficiency.":[112],"Using":[113],"cycle-approximate":[115],"simulator":[116],"representative":[118],"LLM":[119],"layers":[120],"full":[123],"model":[124],"with":[125],"real-world":[126],"traces,":[127],"enables:":[129],"tiling":[131],"breaks":[133],"Pareto-optimal":[135],"frontier":[136],"from":[137],"work,":[139],"parallelization":[141],"improves":[143],"latency":[144],"by":[145,154],"~2.72x,":[146],"time-multiplexing":[149],"increases":[151],"compute":[152],"utilization":[153],"~2.64x":[155],"over":[156],"their":[161],"implementations.":[162]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-11-13T00:00:00"}
