{"id":"https://openalex.org/W4414688318","doi":"https://doi.org/10.1145/3766882.3767187","title":"Piper: Towards Flexible Pipeline Parallelism for PyTorch","display_name":"Piper: Towards Flexible Pipeline Parallelism for PyTorch","publication_year":2025,"publication_date":"2025-10-01","ids":{"openalex":"https://openalex.org/W4414688318","doi":"https://doi.org/10.1145/3766882.3767187"},"language":"en","primary_location":{"id":"doi:10.1145/3766882.3767187","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3766882.3767187","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th Workshop on Practical Adoption Challenges of ML for Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3766882.3767187","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041547187","display_name":"Megan Frisella","orcid":"https://orcid.org/0000-0002-2245-2687"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Megan Frisella","raw_affiliation_strings":["University of Washington"],"affiliations":[{"raw_affiliation_string":"University of Washington","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119790801","display_name":"Arvin Oentoro","orcid":null},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Arvin Oentoro","raw_affiliation_strings":["University of Washington"],"affiliations":[{"raw_affiliation_string":"University of Washington","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101486243","display_name":"Xiangyu Gao","orcid":"https://orcid.org/0000-0002-1038-6539"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiangyu Gao","raw_affiliation_strings":["University of Washington"],"affiliations":[{"raw_affiliation_string":"University of Washington","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034400503","display_name":"Gilbert Bernstein","orcid":"https://orcid.org/0000-0002-3016-1169"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gilbert Bernstein","raw_affiliation_strings":["University of Washington"],"affiliations":[{"raw_affiliation_string":"University of Washington","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103023827","display_name":"Stephanie Wang","orcid":"https://orcid.org/0009-0006-8684-2357"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stephanie Wang","raw_affiliation_strings":["University of Washington"],"affiliations":[{"raw_affiliation_string":"University of Washington","institution_ids":["https://openalex.org/I201448701"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5041547187"],"corresponding_institution_ids":["https://openalex.org/I201448701"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.33469932,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7161999940872192},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6481000185012817},{"id":"https://openalex.org/keywords/generality","display_name":"Generality","score":0.5859000086784363},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.5332000255584717},{"id":"https://openalex.org/keywords/execution-model","display_name":"Execution model","score":0.5242000222206116},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4316999912261963},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.43059998750686646},{"id":"https://openalex.org/keywords/runtime-system","display_name":"Runtime system","score":0.4259999990463257}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8804000020027161},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7161999940872192},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6481000185012817},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5902000069618225},{"id":"https://openalex.org/C2780767217","wikidata":"https://www.wikidata.org/wiki/Q5532421","display_name":"Generality","level":2,"score":0.5859000086784363},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.5332000255584717},{"id":"https://openalex.org/C2776834041","wikidata":"https://www.wikidata.org/wiki/Q25346349","display_name":"Execution model","level":2,"score":0.5242000222206116},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.4505000114440918},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4316999912261963},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.43059998750686646},{"id":"https://openalex.org/C2780870223","wikidata":"https://www.wikidata.org/wiki/Q1004415","display_name":"Runtime system","level":2,"score":0.4259999990463257},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3862999975681305},{"id":"https://openalex.org/C200833197","wikidata":"https://www.wikidata.org/wiki/Q333707","display_name":"Compile time","level":3,"score":0.3481000065803528},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.33799999952316284},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3353999853134155},{"id":"https://openalex.org/C8767382","wikidata":"https://www.wikidata.org/wiki/Q1058454","display_name":"Dynamic compilation","level":3,"score":0.3271999955177307},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3260999917984009},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.3248000144958496},{"id":"https://openalex.org/C106515295","wikidata":"https://www.wikidata.org/wiki/Q26806595","display_name":"Parallel processing","level":2,"score":0.29179999232292175},{"id":"https://openalex.org/C42992933","wikidata":"https://www.wikidata.org/wiki/Q691169","display_name":"Task parallelism","level":3,"score":0.2782999873161316},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C140763907","wikidata":"https://www.wikidata.org/wiki/Q2714055","display_name":"Instruction-level parallelism","level":3,"score":0.2623000144958496},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.2581999897956848},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.257099986076355},{"id":"https://openalex.org/C31395832","wikidata":"https://www.wikidata.org/wiki/Q1318674","display_name":"Testbed","level":2,"score":0.2540000081062317}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3766882.3767187","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3766882.3767187","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th Workshop on Practical Adoption Challenges of ML for Systems","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3766882.3767187","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3766882.3767187","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 4th Workshop on Practical Adoption Challenges of ML for Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2913954081","https://openalex.org/W2969388332","https://openalex.org/W3086105743","https://openalex.org/W3204998121","https://openalex.org/W4391125288","https://openalex.org/W4394998532","https://openalex.org/W4405903187"],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,37],"rise":[2],"of":[3,88],"increasingly":[4],"large-scale":[5],"ML":[6,21],"systems,":[7],"distributed":[8,26],"execution":[9,27,40,54,61,93],"across":[10],"multiple":[11],"devices":[12],"is":[13],"critical":[14],"for":[15,55],"efficient":[16],"training":[17],"and":[18,39,46,59,78,91],"inference.":[19],"Current":[20],"systems":[22],"have":[23],"adopted":[24],"pipeline-parallel":[25,53,92],"strategies":[28],"to":[29,83],"improve":[30],"resource":[31],"efficiency,":[32],"but":[33],"lack":[34],"generality":[35],"in":[36],"models":[38,58,90],"schedules":[41],"they":[42],"support.":[43],"We":[44,63],"designed":[45],"prototyped":[47],"Piper,":[48],"a":[49,85],"compiler":[50],"that":[51,68],"automates":[52],"arbitrary":[56],"PyTorch":[57,89],"custom":[60],"schedules.":[62,94],"present":[64],"preliminary":[65],"performance":[66],"results":[67],"compare":[69],"with":[70],"existing":[71],"state-of-the-art":[72],"pipeline":[73],"parallelism":[74],"frameworks":[75],"pipelining":[76],"Llama":[77],"CLIP":[79],"models,":[80],"while":[81],"aiming":[82],"support":[84],"wider":[86],"range":[87]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
