{"id":"https://openalex.org/W4410204569","doi":"https://doi.org/10.1109/tcad.2025.3568348","title":"Para-Pipe: Exploiting Hierarchical Operator Parallelism of ML Computational Graphs on SoCs","display_name":"Para-Pipe: Exploiting Hierarchical Operator Parallelism of ML Computational Graphs on SoCs","publication_year":2025,"publication_date":"2025-05-08","ids":{"openalex":"https://openalex.org/W4410204569","doi":"https://doi.org/10.1109/tcad.2025.3568348"},"language":"en","primary_location":{"id":"doi:10.1109/tcad.2025.3568348","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2025.3568348","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yujie Zhang","orcid":"https://orcid.org/0009-0009-8982-4084"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Yujie Zhang","raw_affiliation_strings":["School of Computing, National University of Singapore, Queenstown, Singapore"],"raw_orcid":"https://orcid.org/0009-0009-8982-4084","affiliations":[{"raw_affiliation_string":"School of Computing, National University of Singapore, Queenstown, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081598354","display_name":"Huiying Lan","orcid":"https://orcid.org/0000-0003-3120-5773"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Huiying Lan","raw_affiliation_strings":["School of Computing, National University of Singapore, Queenstown, Singapore"],"raw_orcid":"https://orcid.org/0000-0003-3120-5773","affiliations":[{"raw_affiliation_string":"School of Computing, National University of Singapore, Queenstown, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108999618","display_name":"Ehsan Aghapour","orcid":"https://orcid.org/0000-0002-0291-7555"},"institutions":[{"id":"https://openalex.org/I55106644","display_name":"Amsterdam University of Applied Sciences","ror":"https://ror.org/00y2z2s03","country_code":"NL","type":"education","lineage":["https://openalex.org/I55106644"]},{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Ehsan Aghapour","raw_affiliation_strings":["Department of Parallel Computer Systems, University of Amsterdam, Amsterdam, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0002-0291-7555","affiliations":[{"raw_affiliation_string":"Department of Parallel Computer Systems, University of Amsterdam, Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I887064364","https://openalex.org/I55106644"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017565499","display_name":"Zhiyuan Ning","orcid":"https://orcid.org/0009-0000-4718-1564"},"institutions":[{"id":"https://openalex.org/I883665419","display_name":"Blackburn College","ror":"https://ror.org/02a1et264","country_code":"US","type":"education","lineage":["https://openalex.org/I883665419"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhiyuan Ning","raw_affiliation_strings":["Information Technology Department, Black Sesame Technologies, San Jose, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Information Technology Department, Black Sesame Technologies, San Jose, CA, USA","institution_ids":["https://openalex.org/I883665419"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077847021","display_name":"Peng Zan","orcid":"https://orcid.org/0000-0003-2588-2188"},"institutions":[{"id":"https://openalex.org/I883665419","display_name":"Blackburn College","ror":"https://ror.org/02a1et264","country_code":"US","type":"education","lineage":["https://openalex.org/I883665419"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Peng Zan","raw_affiliation_strings":["Information Technology Department, Black Sesame Technologies, San Jose, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Information Technology Department, Black Sesame Technologies, San Jose, CA, USA","institution_ids":["https://openalex.org/I883665419"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079708995","display_name":"Weidong Shao","orcid":"https://orcid.org/0000-0003-1093-9006"},"institutions":[{"id":"https://openalex.org/I883665419","display_name":"Blackburn College","ror":"https://ror.org/02a1et264","country_code":"US","type":"education","lineage":["https://openalex.org/I883665419"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Weidong Shao","raw_affiliation_strings":["Information Technology Department, Black Sesame Technologies, San Jose, CA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Information Technology Department, Black Sesame Technologies, San Jose, CA, USA","institution_ids":["https://openalex.org/I883665419"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067055700","display_name":"Anuj Pathania","orcid":"https://orcid.org/0000-0002-5813-7021"},"institutions":[{"id":"https://openalex.org/I55106644","display_name":"Amsterdam University of Applied Sciences","ror":"https://ror.org/00y2z2s03","country_code":"NL","type":"education","lineage":["https://openalex.org/I55106644"]},{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Anuj Pathania","raw_affiliation_strings":["Department of Parallel Computer Systems, University of Amsterdam, Amsterdam, The Netherlands"],"raw_orcid":"https://orcid.org/0000-0002-5813-7021","affiliations":[{"raw_affiliation_string":"Department of Parallel Computer Systems, University of Amsterdam, Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I887064364","https://openalex.org/I55106644"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049237676","display_name":"Tulika Mitra","orcid":"https://orcid.org/0000-0003-4136-4188"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Tulika Mitra","raw_affiliation_strings":["School of Computing, National University of Singapore, Queenstown, Singapore"],"raw_orcid":"https://orcid.org/0000-0003-4136-4188","affiliations":[{"raw_affiliation_string":"School of Computing, National University of Singapore, Queenstown, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07494311,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"44","issue":"12","first_page":"4472","last_page":"4485"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9293000102043152,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12292","display_name":"Graph Theory and Algorithms","score":0.9293000102043152,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9279000163078308,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11522","display_name":"VLSI and FPGA Design Techniques","score":0.9186999797821045,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.6931963562965393},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6777989864349365},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6593835353851318},{"id":"https://openalex.org/keywords/operator","display_name":"Operator (biology)","score":0.5619107484817505},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.4339945912361145},{"id":"https://openalex.org/keywords/task-parallelism","display_name":"Task parallelism","score":0.42709192633628845},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.35019588470458984},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.0921458899974823}],"concepts":[{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.6931963562965393},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6777989864349365},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6593835353851318},{"id":"https://openalex.org/C17020691","wikidata":"https://www.wikidata.org/wiki/Q139677","display_name":"Operator (biology)","level":5,"score":0.5619107484817505},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.4339945912361145},{"id":"https://openalex.org/C42992933","wikidata":"https://www.wikidata.org/wiki/Q691169","display_name":"Task parallelism","level":3,"score":0.42709192633628845},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.35019588470458984},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0921458899974823},{"id":"https://openalex.org/C158448853","wikidata":"https://www.wikidata.org/wiki/Q425218","display_name":"Repressor","level":4,"score":0.0},{"id":"https://openalex.org/C86339819","wikidata":"https://www.wikidata.org/wiki/Q407384","display_name":"Transcription factor","level":3,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcad.2025.3568348","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcad.2025.3568348","pdf_url":null,"source":{"id":"https://openalex.org/S100835903","display_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","issn_l":"0278-0070","issn":["0278-0070","1937-4151"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8039525913","display_name":null,"funder_award_id":"NRF-CRP23-2019-0003","funder_id":"https://openalex.org/F4320320709","funder_display_name":"National Research Foundation Singapore"},{"id":"https://openalex.org/G8140490772","display_name":null,"funder_award_id":"NRF-CRP232019-0003","funder_id":"https://openalex.org/F4320320709","funder_display_name":"National Research Foundation Singapore"}],"funders":[{"id":"https://openalex.org/F4320320709","display_name":"National Research Foundation Singapore","ror":"https://ror.org/03cpyc314"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1979022228","https://openalex.org/W2149294210","https://openalex.org/W2150879632","https://openalex.org/W2160104654","https://openalex.org/W2183341477","https://openalex.org/W2768106074","https://openalex.org/W2795444169","https://openalex.org/W2892077825","https://openalex.org/W2922395136","https://openalex.org/W2929084559","https://openalex.org/W2936286953","https://openalex.org/W3017948916","https://openalex.org/W3095320840","https://openalex.org/W3168243772","https://openalex.org/W3169512402","https://openalex.org/W3177372937","https://openalex.org/W4225793049","https://openalex.org/W4235470811","https://openalex.org/W4246193833","https://openalex.org/W4283020086","https://openalex.org/W4303645613","https://openalex.org/W4312235386","https://openalex.org/W4312894406","https://openalex.org/W4313496554","https://openalex.org/W4376852255","https://openalex.org/W4386764053","https://openalex.org/W4387435523","https://openalex.org/W4389505293","https://openalex.org/W4392248363","https://openalex.org/W4394569872","https://openalex.org/W4399534610"],"related_works":["https://openalex.org/W2950520577","https://openalex.org/W1501159154","https://openalex.org/W2003935582","https://openalex.org/W74409296","https://openalex.org/W1554644772","https://openalex.org/W2494130044","https://openalex.org/W2468095077","https://openalex.org/W29548032","https://openalex.org/W4230999561","https://openalex.org/W1595672120"],"abstract_inverted_index":{"As":[0],"edge-based":[1],"deep":[2,174],"learning":[3,175],"applications":[4],"become":[5],"more":[6],"complex,":[7],"optimizing":[8,81],"performance":[9,83],"on":[10,153,186],"heterogeneous":[11],"System-on-Chips":[12],"(SoCs)":[13],"presents":[14],"unique":[15],"challenges.":[16],"Traditional":[17],"pipelining":[18,72],"techniques":[19],"distributing":[20],"the":[21,35,87,109,167],"computation":[22],"across":[23,62,122],"different":[24],"on-chip":[25],"processing":[26,64],"units,":[27,65],"while":[28],"effective":[29],"for":[30],"throughput,":[31],"do":[32],"not":[33],"address":[34],"latency":[36,114,152],"demands":[37],"posed":[38],"by":[39,115],"modern":[40],"neural":[41],"networks":[42],"with":[43,158],"complex":[44],"interdependencies":[45],"and":[46,113,121,151,162,177,201],"extensive":[47],"operator":[48,56,101],"parallelism.":[49],"There":[50],"is":[51],"a":[52,78,93,104,147,173],"potential":[53],"in":[54],"leveraging":[55],"parallelism":[57,102,118],"to":[58,204],"enable":[59],"concurrent":[60],"execution":[61,75],"multiple":[63,143],"thereby":[66],"reducing":[67],"inference":[68],"latency.":[69],"However,":[70],"prioritizing":[71],"or":[73],"parallel":[74,206],"often":[76],"necessitates":[77],"compromise,":[79],"where":[80],"one":[82],"metric":[84],"adversely":[85],"impacts":[86],"other.":[88],"This":[89,125],"paper":[90],"introduces":[91],"Para-Pipe,":[92],"hierarchical":[94],"mapping":[95],"framework":[96],"that":[97,140],"integrates":[98],"intra-and":[99],"inter-stage":[100],"within":[103,120],"pipelined":[105,199],"architecture.":[106],"Para-Pipe":[107,141,185],"navigates":[108],"trade-off":[110],"between":[111,149],"throughput":[112,150],"selectively":[116],"fine-tuning":[117],"levels":[119],"pipeline":[123],"stages.":[124],"strategy":[126],"can":[127],"significantly":[128,133],"reduce":[129],"inter-processor":[130],"communication":[131],"overhead,":[132],"improving":[134],"energy":[135,192],"efficiency.":[136],"Our":[137],"evaluation":[138],"demonstrates":[139],"generates":[142],"Pareto-optimal":[144],"configurations,":[145],"achieving":[146],"balance":[148],"an":[154,190],"Amlogic":[155,187],"SoC":[156,171,188],"equipped":[157],"ARM":[159],"big.LITTLE":[160],"CPUs":[161],"GPU,":[163],"as":[164,166],"well":[165],"Black":[168],"Sesame":[169],"Technology":[170],"featuring":[172],"accelerator":[176],"two":[178],"DSPs.":[179],"More":[180],"importantly,":[181],"throughput-optimized":[182],"configurations":[183],"under":[184],"show":[189],"average":[191],"efficiency":[193],"improvement":[194],"of":[195],"11.0%":[196],"over":[197],"purely":[198],"strategies":[200],"23.3%":[202],"relative":[203],"non-pipelined":[205],"execution.":[207]},"counts_by_year":[],"updated_date":"2025-11-23T05:10:03.516525","created_date":"2025-10-10T00:00:00"}
