{"id":"https://openalex.org/W7133504784","doi":"https://doi.org/10.1109/hpca68181.2026.11408448","title":"PADE: A Predictor-Free Sparse Attention Accelerator via Unified Execution and Stage Fusion","display_name":"PADE: A Predictor-Free Sparse Attention Accelerator via Unified Execution and Stage Fusion","publication_year":2026,"publication_date":"2026-01-31","ids":{"openalex":"https://openalex.org/W7133504784","doi":"https://doi.org/10.1109/hpca68181.2026.11408448"},"language":null,"primary_location":{"id":"doi:10.1109/hpca68181.2026.11408448","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca68181.2026.11408448","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128126107","display_name":"Huizheng Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Huizheng Wang","raw_affiliation_strings":["School of Integrated Circuits, BNRist, Tsinghua University,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, BNRist, Tsinghua University,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121176129","display_name":"Hongbin Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongbin Wang","raw_affiliation_strings":["School of Integrated Circuits, BNRist, Tsinghua University,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, BNRist, Tsinghua University,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121146620","display_name":"Zichuan Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zichuan Wang","raw_affiliation_strings":["School of Integrated Circuits, BNRist, Tsinghua University,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, BNRist, Tsinghua University,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009692777","display_name":"Zhiheng Yue","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiheng Yue","raw_affiliation_strings":["School of Integrated Circuits, BNRist, Tsinghua University,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, BNRist, Tsinghua University,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128113509","display_name":"Yang Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Wang","raw_affiliation_strings":["School of Integrated Circuits, BNRist, Tsinghua University,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, BNRist, Tsinghua University,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007581833","display_name":"Chang Li","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Li","raw_affiliation_strings":["School of Computer Science and Engineering, Shanghai Jiao Tong University,Shanghai,China,200240"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Shanghai Jiao Tong University,Shanghai,China,200240","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068155662","display_name":"Yuanming Hu","orcid":"https://orcid.org/0000-0002-1136-9909"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Hu","raw_affiliation_strings":["School of Integrated Circuits, BNRist, Tsinghua University,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, BNRist, Tsinghua University,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078843314","display_name":"Shouyi Yin","orcid":"https://orcid.org/0000-0002-8438-8588"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shouyi Yin","raw_affiliation_strings":["School of Integrated Circuits, BNRist, Tsinghua University,Beijing,China,100084"],"affiliations":[{"raw_affiliation_string":"School of Integrated Circuits, BNRist, Tsinghua University,Beijing,China,100084","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5128126107"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.53324428,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.25200000405311584,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.25200000405311584,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.12610000371932983,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.10909999907016754,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stage","display_name":"Stage (stratigraphy)","score":0.43230000138282776},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.36910000443458557},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.33489999175071716},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.314300000667572},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.289900004863739},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.2808000147342682}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6297000050544739},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44749999046325684},{"id":"https://openalex.org/C146357865","wikidata":"https://www.wikidata.org/wiki/Q1123245","display_name":"Stage (stratigraphy)","level":2,"score":0.43230000138282776},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.36910000443458557},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.33489999175071716},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.314300000667572},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2928999960422516},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.289900004863739},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.28130000829696655},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2808000147342682},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.2644999921321869},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.258899986743927}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca68181.2026.11408448","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca68181.2026.11408448","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1187591493","display_name":null,"funder_award_id":"2022ZD0115200","funder_id":"https://openalex.org/F4320329860","funder_display_name":"National Science and Technology Major Project"},{"id":"https://openalex.org/G3145010481","display_name":null,"funder_award_id":"62125403,U24A20234,92464302,U24B20164","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320329860","display_name":"National Science and Technology Major Project","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":107,"referenced_works":["https://openalex.org/W2034861439","https://openalex.org/W2108598243","https://openalex.org/W2260498192","https://openalex.org/W2418958843","https://openalex.org/W2431931973","https://openalex.org/W2767737961","https://openalex.org/W2794141774","https://openalex.org/W2794478957","https://openalex.org/W2913573286","https://openalex.org/W2931118404","https://openalex.org/W2949870694","https://openalex.org/W2962843773","https://openalex.org/W2970777192","https://openalex.org/W2979310060","https://openalex.org/W2979439447","https://openalex.org/W2979719709","https://openalex.org/W2979747168","https://openalex.org/W2979826702","https://openalex.org/W2980113464","https://openalex.org/W3011554625","https://openalex.org/W3016542674","https://openalex.org/W3016904661","https://openalex.org/W3017024317","https://openalex.org/W3047848469","https://openalex.org/W3102587717","https://openalex.org/W3103168911","https://openalex.org/W3105802176","https://openalex.org/W3125180045","https://openalex.org/W3131500599","https://openalex.org/W3134893068","https://openalex.org/W3135160281","https://openalex.org/W3158233278","https://openalex.org/W3158543914","https://openalex.org/W3158831985","https://openalex.org/W3159727696","https://openalex.org/W3159782774","https://openalex.org/W3185702163","https://openalex.org/W3187481008","https://openalex.org/W3187908937","https://openalex.org/W3189877953","https://openalex.org/W3190761184","https://openalex.org/W3194676777","https://openalex.org/W3205192296","https://openalex.org/W3206290655","https://openalex.org/W3206453033","https://openalex.org/W3207265322","https://openalex.org/W3210059290","https://openalex.org/W3210117238","https://openalex.org/W4206223617","https://openalex.org/W4211076402","https://openalex.org/W4214686755","https://openalex.org/W4224267386","https://openalex.org/W4236868170","https://openalex.org/W4247198796","https://openalex.org/W4280557024","https://openalex.org/W4281708367","https://openalex.org/W4281800613","https://openalex.org/W4285236966","https://openalex.org/W4291653336","https://openalex.org/W4293025165","https://openalex.org/W4296473245","https://openalex.org/W4308083513","https://openalex.org/W4308083526","https://openalex.org/W4308083753","https://openalex.org/W4308083827","https://openalex.org/W4308083928","https://openalex.org/W4313007769","https://openalex.org/W4313467238","https://openalex.org/W4319988867","https://openalex.org/W4360831795","https://openalex.org/W4360831803","https://openalex.org/W4360831844","https://openalex.org/W4360831992","https://openalex.org/W4360832001","https://openalex.org/W4375928954","https://openalex.org/W4377115595","https://openalex.org/W4380875572","https://openalex.org/W4380881077","https://openalex.org/W4386765251","https://openalex.org/W4387789741","https://openalex.org/W4389476299","https://openalex.org/W4392450088","https://openalex.org/W4392567268","https://openalex.org/W4393406875","https://openalex.org/W4393407021","https://openalex.org/W4399487278","https://openalex.org/W4401211845","https://openalex.org/W4404133659","https://openalex.org/W4404133752","https://openalex.org/W4404133870","https://openalex.org/W4404954358","https://openalex.org/W4404954664","https://openalex.org/W4405220108","https://openalex.org/W4406214400","https://openalex.org/W4409248488","https://openalex.org/W4409248495","https://openalex.org/W4409248530","https://openalex.org/W4409248709","https://openalex.org/W4411171608","https://openalex.org/W4411486557","https://openalex.org/W4413017445","https://openalex.org/W4415797154","https://openalex.org/W7092191477","https://openalex.org/W7133184517","https://openalex.org/W7133198810","https://openalex.org/W7133484568","https://openalex.org/W7133484784"],"related_works":[],"abstract_inverted_index":{"Attention-based":[0],"models":[1],"have":[2],"revolutionized":[3],"AI,":[4],"but":[5],"the":[6,34,50,63,98],"quadratic":[7],"cost":[8],"of":[9,37],"self-attention":[10],"incurs":[11],"severe":[12],"computational":[13,159],"and":[14,89,158,194,218,228],"memory":[15],"overhead.":[16],"Sparse":[17],"attention":[18,115,152],"methods":[19],"alleviate":[20],"this":[21],"by":[22,53,97],"skipping":[23],"low-relevance":[24],"token":[25],"pairs.":[26],"However,":[27,69],"current":[28],"approaches":[29],"lack":[30],"practicality":[31],"due":[32,86],"to":[33,80,87,130,145,154,207],"heavy":[35],"expense":[36],"added":[38,176],"sparsity":[39,77,102,170,177],"predictor,":[40],"which":[41,61],"severely":[42],"drops":[43],"their":[44],"hardware":[45,147],"efficiency.":[46],"This":[47],"paper":[48],"advances":[49],"state-of-the-art":[51],"(SOTA)":[52],"proposing":[54],"a":[55,66,108],"bit-serial":[56],"enable":[57,168],"stage-fusion":[58],"(BSF)":[59],"mechanism,":[60],"eliminates":[62],"need":[64],"for":[65,112],"separate":[67],"predictor.":[68,178],"it":[70],"faces":[71],"key":[72,120],"challenges:":[73],"1)":[74,122],"Inaccurate":[75],"bit-sliced":[76],"speculation":[78],"leads":[79],"incorrect":[81],"pruning;":[82],"2)":[83,139],"Hardware":[84],"under-utilization":[85],"finegrained":[88],"imbalanced":[90],"bit-level":[91],"workloads.":[92],"3)":[93,149],"Tiling":[94],"difficulty":[95],"caused":[96],"row-wise":[99],"dependency":[100],"in":[101],"pruning":[103],"criteria.":[104],"We":[105],"propose":[106],"PADE,":[107],"predictor-free":[109],"algorithm-hardware":[110],"codesign":[111],"dynamic":[113],"sparse":[114],"acceleration.":[116],"PADE":[117,186,210],"features":[118],"three":[119],"innovations:":[121],"Bit-wise":[123],"uncertainty":[124],"interval-enabled":[125],"guard":[126],"filtering":[127],"(BUI-GF)":[128],"strategy":[129],"accurately":[131],"identify":[132],"trivial":[133],"tokens":[134],"during":[135],"each":[136],"bit":[137],"round;":[138],"Bidirectional":[140],"sparsity-based":[141],"out-of-order":[142],"execution":[143],"(BS-OOE)":[144],"improve":[146],"utilization;":[148],"Interleaving-based":[150],"sparsity-tiled":[151],"(ISTA)":[153],"reduce":[155],"both":[156],"I/O":[157],"complexity.":[160],"These":[161],"techniques,":[162],"combined":[163],"with":[164],"custom":[165],"accelerator":[166],"designs,":[167],"practical":[169],"acceleration":[171],"without":[172],"relying":[173],"on":[174,181],"an":[175],"Extensive":[179],"experiments":[180],"22":[182],"benchmarks":[183],"show":[184],"that":[185],"achieves":[187,211],"<tex":[188,195,212,219],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[189,196,213,220],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$7.43":[190],"\\times$</tex>":[191,198,217,222],"speed":[192],"up":[193],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$31.1":[197],"higher":[199],"energy":[200,223],"efficiency":[201],"than":[202,225],"Nvidia":[203],"H100":[204],"GPU.":[205],"Compared":[206],"SOTA":[208],"accelerators,":[209],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$5.1":[214],"\\times,":[215],"4.3":[216],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$3.4":[221],"saving":[224],"Sanger,":[226],"DOTA":[227],"SOFA.":[229]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-03-05T00:00:00"}
