{"id":"https://openalex.org/W7134903178","doi":"https://doi.org/10.1109/asp-dac66049.2026.11420796","title":"PipeViT: Accelerating Vision Transformers via Intra-Layer Pipelining","display_name":"PipeViT: Accelerating Vision Transformers via Intra-Layer Pipelining","publication_year":2026,"publication_date":"2026-01-19","ids":{"openalex":"https://openalex.org/W7134903178","doi":"https://doi.org/10.1109/asp-dac66049.2026.11420796"},"language":null,"primary_location":{"id":"doi:10.1109/asp-dac66049.2026.11420796","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asp-dac66049.2026.11420796","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 31st Asia and South Pacific Design Automation Conference (ASP-DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075911158","display_name":"Xilang Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xilang Zhou","raw_affiliation_strings":["Fudan University,State Key Lab of Integrated Chips &#x0026; Systems, and School of Microelectronics,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems, and School of Microelectronics,Shanghai,China","institution_ids":["https://openalex.org/I4210132426"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103730330","display_name":"Y. E. Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiheng Xu","raw_affiliation_strings":["Fudan University,State Key Lab of Integrated Chips &#x0026; Systems, and School of Microelectronics,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems, and School of Microelectronics,Shanghai,China","institution_ids":["https://openalex.org/I4210132426"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047412399","display_name":"Haodong Lu","orcid":"https://orcid.org/0000-0002-8628-2664"},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haodong Lu","raw_affiliation_strings":["Fudan University,State Key Lab of Integrated Chips &#x0026; Systems, and School of Microelectronics,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems, and School of Microelectronics,Shanghai,China","institution_ids":["https://openalex.org/I4210132426"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128727690","display_name":"Jun Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Yu","raw_affiliation_strings":["Fudan University,State Key Lab of Integrated Chips &#x0026; Systems, and School of Microelectronics,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems, and School of Microelectronics,Shanghai,China","institution_ids":["https://openalex.org/I4210132426"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111559743","display_name":"Kang L. Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210132426","display_name":"Shanghai Fudan Microelectronics (China)","ror":"https://ror.org/02vfj3j86","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210132426"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kun Wang","raw_affiliation_strings":["Fudan University,State Key Lab of Integrated Chips &#x0026; Systems, and School of Microelectronics,Shanghai,China"],"affiliations":[{"raw_affiliation_string":"Fudan University,State Key Lab of Integrated Chips &#x0026; Systems, and School of Microelectronics,Shanghai,China","institution_ids":["https://openalex.org/I4210132426"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5075911158"],"corresponding_institution_ids":["https://openalex.org/I4210132426"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.89544524,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"140","last_page":"146"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.5320000052452087,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.5320000052452087,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.12280000001192093,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12153","display_name":"Advanced Optical Sensing Technologies","score":0.019200000911951065,"subfield":{"id":"https://openalex.org/subfields/3105","display_name":"Instrumentation"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4797999858856201},{"id":"https://openalex.org/keywords/signal-processing","display_name":"Signal processing","score":0.23109999299049377}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5879999995231628},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4797999858856201},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.30320000648498535},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.29019999504089355},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.29019999504089355},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.28299999237060547},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.2768000066280365},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.26179999113082886},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24570000171661377},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.23109999299049377}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asp-dac66049.2026.11420796","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asp-dac66049.2026.11420796","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 31st Asia and South Pacific Design Automation Conference (ASP-DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.8267737030982971}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W4214588794","https://openalex.org/W4318541578","https://openalex.org/W4321637298","https://openalex.org/W4360831786","https://openalex.org/W4360831795","https://openalex.org/W4360832001","https://openalex.org/W4385187240","https://openalex.org/W4386076539","https://openalex.org/W4389162698","https://openalex.org/W4389524555","https://openalex.org/W4390874070","https://openalex.org/W4393973515","https://openalex.org/W4399208718","https://openalex.org/W4415797769","https://openalex.org/W7133227460"],"related_works":[],"abstract_inverted_index":{"Vision":[0],"Transformers":[1],"(ViTs)":[2],"have":[3],"achieved":[4],"high":[5],"performance":[6,26],"across":[7],"various":[8],"computer":[9],"vision":[10],"tasks":[11],"by":[12],"leveraging":[13],"the":[14,18,45,82,97,106,113,123,141,154,157],"attention":[15,19,83,100,115],"mechanism.":[16],"However,":[17],"module":[20],"in":[21,178],"ViTs":[22,36,65],"severely":[23],"hindered":[24],"inference":[25],"due":[27],"to":[28,73,89,104,117,137,167,184,197,207],"its":[29],"low":[30],"operational":[31],"intensity.":[32],"Existing":[33],"approaches":[34],"improve":[35,118],"efficiency":[37,205],"through":[38],"pruning,":[39],"sparsity,":[40],"and":[41,50,61,174,191,200,209],"linearization,":[42],"but":[43],"at":[44],"cost":[46],"of":[47,70,99,156],"fine-tuning":[48],"overhead":[49],"accuracy":[51],"degradation.":[52],"In":[53],"this":[54],"paper,":[55],"we":[56,79,95,111,126],"propose":[57],"PipeViT,":[58],"a":[59,86,128,147],"memory-efficient":[60],"low-latency":[62],"accelerator":[63],"for":[64,132],"inference.":[66],"The":[67],"key":[68],"insight":[69],"PipeViT":[71,164,194],"is":[72],"exploit":[74],"intra-layer":[75],"acceleration":[76],"opportunities.":[77],"Specifically,":[78],"first":[80],"fuse":[81],"operations":[84],"into":[85,101,146],"single":[87,148],"operator":[88],"reduce":[90,105],"memory":[91,108],"access":[92],"overhead.":[93],"Then,":[94],"divide":[96],"input":[98],"multiple":[102],"tiles":[103],"on-chip":[107],"requirement.":[109],"Finally,":[110],"pipeline":[112,134],"tiled":[114],"computation":[116],"overall":[119],"throughput.":[120],"Based":[121],"on":[122],"optimized":[124],"dataflow,":[125],"design":[127],"heterogeneous":[129],"dual-core":[130],"architecture":[131,142],"efficient":[133],"execution.":[135],"Furthermore,":[136],"maximize":[138],"hardware":[139],"utilization,":[140],"can":[143],"be":[144],"reconfigured":[145],"core":[149],"with":[150],"higher":[151,203],"parallelism":[152],"during":[153],"execution":[155],"feed-forward":[158],"network.":[159],"Experimental":[160],"results":[161],"show":[162],"that":[163],"achieves":[165,195],"up":[166,196],"$19.3":[168],"\\times":[169],"1.5":[170],"\\times,":[171],"2.1":[172],"\\times$,":[173],"$2.0":[175],"\\times$":[176,199,202],"improvements":[177],"Frames":[179],"Per":[180],"Second":[181],"(FPS)":[182],"compared":[183,206],"state-of-the-art":[185],"accelerators,":[186],"including":[187],"ViTA,":[188],"Auto-ViT,":[189],"MEViT,":[190],"HeatViT.":[192],"Additionally,":[193],"$8.0":[198],"$2.6":[201],"energy":[204],"CPU":[208],"GPU":[210],"implementations,":[211],"respectively.":[212]},"counts_by_year":[],"updated_date":"2026-03-13T14:20:09.374765","created_date":"2026-03-12T00:00:00"}
