{"id":"https://openalex.org/W4414199116","doi":"https://doi.org/10.1109/dac63849.2025.11133383","title":"PARO: Hardware-Software Co-design with Pattern-aware Reorder-based Attention Quantization in Video Generation Models","display_name":"PARO: Hardware-Software Co-design with Pattern-aware Reorder-based Attention Quantization in Video Generation Models","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4414199116","doi":"https://doi.org/10.1109/dac63849.2025.11133383"},"language":"en","primary_location":{"id":"doi:10.1109/dac63849.2025.11133383","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11133383","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077505315","display_name":"Xinhao Yang","orcid":"https://orcid.org/0000-0002-0873-4514"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xinhao Yang","raw_affiliation_strings":["Tsinghua University,Dept. of EE, BNRist"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Dept. of EE, BNRist","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019400001","display_name":"Tianchen Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianchen Zhao","raw_affiliation_strings":["Tsinghua University,Dept. of EE, BNRist"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Dept. of EE, BNRist","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023007010","display_name":"Hongyi Wang","orcid":"https://orcid.org/0009-0008-7095-7963"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongyi Wang","raw_affiliation_strings":["Tsinghua University,Dept. of EE, BNRist"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Dept. of EE, BNRist","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074195618","display_name":"Wenheng Ma","orcid":"https://orcid.org/0000-0003-2349-7286"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenheng Ma","raw_affiliation_strings":["Tsinghua University,Dept. of EE, BNRist"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Dept. of EE, BNRist","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026831784","display_name":"Shulin Zeng","orcid":"https://orcid.org/0000-0002-1030-3748"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shulin Zeng","raw_affiliation_strings":["Tsinghua University,Dept. of EE, BNRist"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Dept. of EE, BNRist","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068626165","display_name":"Zhenhua Zhu","orcid":"https://orcid.org/0000-0002-4554-1770"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenhua Zhu","raw_affiliation_strings":["Tsinghua University,Dept. of EE, BNRist"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Dept. of EE, BNRist","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086217226","display_name":"Xuefei Ning","orcid":"https://orcid.org/0000-0003-2209-8312"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuefei Ning","raw_affiliation_strings":["Tsinghua University,Dept. of EE, BNRist"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Dept. of EE, BNRist","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023755254","display_name":"Huazhong Yang","orcid":"https://orcid.org/0000-0003-2421-353X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huazhong Yang","raw_affiliation_strings":["Tsinghua University,Dept. of EE, BNRist"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Dept. of EE, BNRist","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100761966","display_name":"Yu Wang","orcid":"https://orcid.org/0000-0003-3831-5940"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Wang","raw_affiliation_strings":["Tsinghua University,Dept. of EE, BNRist"],"affiliations":[{"raw_affiliation_string":"Tsinghua University,Dept. of EE, BNRist","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5077505315"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.23462419,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10741","display_name":"Video Coding and Compression Technologies","score":0.9742000102996826,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9679999947547913,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.7239000201225281},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6776999831199646},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.6406000256538391},{"id":"https://openalex.org/keywords/lossless-compression","display_name":"Lossless compression","score":0.555899977684021},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5371000170707703},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.5042999982833862},{"id":"https://openalex.org/keywords/video-processing","display_name":"Video processing","score":0.3610000014305115},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.34150001406669617}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8747000098228455},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.7239000201225281},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6776999831199646},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.6406000256538391},{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.555899977684021},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5371000170707703},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.5042999982833862},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4226999878883362},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.3889000117778778},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.37619999051094055},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.3610000014305115},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.34150001406669617},{"id":"https://openalex.org/C25797200","wikidata":"https://www.wikidata.org/wiki/Q828137","display_name":"Compression ratio","level":3,"score":0.328000009059906},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3253999948501587},{"id":"https://openalex.org/C134835016","wikidata":"https://www.wikidata.org/wiki/Q690265","display_name":"Lookup table","level":2,"score":0.3230000138282776},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.3122999966144562},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.310699999332428},{"id":"https://openalex.org/C13481523","wikidata":"https://www.wikidata.org/wiki/Q412438","display_name":"Image compression","level":4,"score":0.3066999912261963},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.29820001125335693},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2888000011444092},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.2856000065803528},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.2678000032901764},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.26080000400543213},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.25209999084472656},{"id":"https://openalex.org/C2778192920","wikidata":"https://www.wikidata.org/wiki/Q16874989","display_name":"Signal compression","level":4,"score":0.25110000371932983}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dac63849.2025.11133383","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11133383","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320314786","display_name":"Xilinx","ror":"https://ror.org/01rb7bk56"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W2725159389","https://openalex.org/W2963122961","https://openalex.org/W3017024317","https://openalex.org/W3159727696","https://openalex.org/W3189877953","https://openalex.org/W3206453033","https://openalex.org/W4214686755","https://openalex.org/W4285601701","https://openalex.org/W4308083526","https://openalex.org/W4313069943","https://openalex.org/W4360831786","https://openalex.org/W4360831795","https://openalex.org/W4390872297","https://openalex.org/W4390873361","https://openalex.org/W4390873673","https://openalex.org/W4390874113","https://openalex.org/W4390874580","https://openalex.org/W4394998519"],"related_works":[],"abstract_inverted_index":{"Transformer-based":[0],"video":[1,43,74,97],"generation":[2,75,98],"models":[3,76],"have":[4],"demonstrated":[5],"significant":[6],"potential":[7],"in":[8,64,80,202,217],"content":[9],"creation.":[10],"However,":[11],"the":[12,30,107,155,166,192,203,240],"current":[13],"state-of-the-art":[14,236],"model":[15,59],"employing":[16],"\u201c":[17],"3":[18],"D":[19],"full":[20,113],"attention\u201d":[21],"encounters":[22],"substantial":[23],"computation":[24],"and":[25,40,68,115,226,242],"storage":[26],"challenges.":[27],"For":[28],"instance,":[29],"attention":[31,103,109,114,167,205],"map":[32,168],"size":[33],"for":[34],"$\\operatorname{Cog}$":[35],"VideoX-5B":[36],"requires":[37],"56.50":[38],"GB,":[39],"generating":[41],"a":[42,96,117,126],"of":[44,111,146,151,157],"49":[45],"frames":[46],"takes":[47],"approximately":[48],"1":[49],"minute":[50],"on":[51,239],"an":[52,143,177,222],"NVIDIA":[53,223],"A100":[54,224],"GPU":[55,225],"under":[56,142],"FP16.":[57],"Although":[58],"quantization":[60,134],"has":[61],"proven":[62],"effective":[63],"reducing":[65],"both":[66],"memory":[67],"computational":[69],"costs,":[70],"applying":[71],"it":[72],"to":[73,121,138,153,170,198,213,221,229],"still":[77],"faces":[78],"challenges":[79],"preserving":[81],"algorithm":[82],"performance":[83,219],"while":[84],"ensuring":[85],"efficient":[86],"hardware":[87,200],"processing.":[88],"To":[89],"address":[90],"these":[91,123],"issues,":[92],"we":[93],"introduce":[94],"PARO,":[95],"accelerator":[99],"with":[100],"patternaware":[101],"reorder-based":[102],"quantization.":[104],"PARO":[105,175,210],"investigates":[106],"diverse":[108],"patterns":[110,124],"3D":[112],"proposes":[116],"novel":[118],"reorder":[119],"technique":[120],"unify":[122],"into":[125],"unified":[127],"\u201cblock":[128],"diagonal\u201d":[129],"structure.":[130],"Block-wise":[131],"mixed":[132],"precision":[133],"is":[135],"further":[136],"applied":[137],"achieve":[139],"lossless":[140],"compression":[141],"average":[144],"bitwidth":[145,169],"4.80":[147],"bits.":[148],"In":[149],"terms":[150],"hardware,":[152],"overcome":[154],"limitation":[156],"existing":[158],"mixed-precision":[159],"computing":[160],"units":[161],"could":[162],"not":[163],"fully":[164,196],"utilize":[165],"accelerate":[171],"$Q":[172],"K$":[173],"multiplication,":[174],"designs":[176],"output-bitwidth":[178],"aware":[179],"mixedprecision":[180,193],"processing":[181],"element":[182],"(PE)":[183],"array":[184],"through":[185],"hardwaresoftware":[186],"co-design.":[187],"This":[188],"approach":[189],"ensures":[190],"that":[191,209],"characteristics":[194],"are":[195],"utilized":[197],"enhance":[199],"efficiency":[201],"bottleneck":[204],"computation.":[206],"Experiments":[207],"demonstrate":[208],"delivers":[211],"up":[212,228],"$2.71":[214],"\\times$":[215,233],"improvement":[216],"end-to-end":[218],"compared":[220],"achieves":[227],"$6.38":[230],"\\sim":[231],"7.05":[232],"speedup":[234],"over":[235],"ASICbased":[237],"accelerators":[238],"CogVideoX-2B":[241],"5B":[243],"models.":[244]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
