{"id":"https://openalex.org/W4414197321","doi":"https://doi.org/10.1109/dac63849.2025.11133343","title":"DIAS: Distance-based Attention Sparsity for Ultra-Long-Sequence Transformer with Tree-like Processing-in-Memory Architecture","display_name":"DIAS: Distance-based Attention Sparsity for Ultra-Long-Sequence Transformer with Tree-like Processing-in-Memory Architecture","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4414197321","doi":"https://doi.org/10.1109/dac63849.2025.11133343"},"language":"en","primary_location":{"id":"doi:10.1109/dac63849.2025.11133343","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11133343","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Zekai Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zekai Chen","raw_affiliation_strings":["LEFT/BNRist/SKLSNC, Tsinghua University,Department of Electronic Engineering,Beijing,China"],"affiliations":[{"raw_affiliation_string":"LEFT/BNRist/SKLSNC, Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100333353","display_name":"Yiming Chen","orcid":"https://orcid.org/0000-0002-1408-5194"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiming Chen","raw_affiliation_strings":["LEFT/BNRist/SKLSNC, Tsinghua University,Department of Electronic Engineering,Beijing,China"],"affiliations":[{"raw_affiliation_string":"LEFT/BNRist/SKLSNC, Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022116721","display_name":"Teng Wan","orcid":"https://orcid.org/0009-0003-8142-1121"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Teng Wan","raw_affiliation_strings":["LEFT/BNRist/SKLSNC, Tsinghua University,Department of Electronic Engineering,Beijing,China"],"affiliations":[{"raw_affiliation_string":"LEFT/BNRist/SKLSNC, Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041252864","display_name":"Tianyi Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianyi Yu","raw_affiliation_strings":["LEFT/BNRist/SKLSNC, Tsinghua University,Department of Electronic Engineering,Beijing,China"],"affiliations":[{"raw_affiliation_string":"LEFT/BNRist/SKLSNC, Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100701612","display_name":"Yu Wang","orcid":"https://orcid.org/0000-0002-9173-1209"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yu Wang","raw_affiliation_strings":["LEFT/BNRist/SKLSNC, Tsinghua University,Department of Electronic Engineering,Beijing,China"],"affiliations":[{"raw_affiliation_string":"LEFT/BNRist/SKLSNC, Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103867707","display_name":"Huazhong Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huazhong Yang","raw_affiliation_strings":["LEFT/BNRist/SKLSNC, Tsinghua University,Department of Electronic Engineering,Beijing,China"],"affiliations":[{"raw_affiliation_string":"LEFT/BNRist/SKLSNC, Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100764674","display_name":"Xueqing Li","orcid":"https://orcid.org/0000-0002-8051-3345"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xueqing Li","raw_affiliation_strings":["LEFT/BNRist/SKLSNC, Tsinghua University,Department of Electronic Engineering,Beijing,China"],"affiliations":[{"raw_affiliation_string":"LEFT/BNRist/SKLSNC, Tsinghua University,Department of Electronic Engineering,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.25427043,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.6834999918937683},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6557999849319458},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6297000050544739},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5866000056266785},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.564300000667572},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.4512999951839447},{"id":"https://openalex.org/keywords/memory-architecture","display_name":"Memory architecture","score":0.43639999628067017},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.3880999982357025}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.800599992275238},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.6834999918937683},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6557999849319458},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6297000050544739},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5866000056266785},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.564300000667572},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.4512999951839447},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.44429999589920044},{"id":"https://openalex.org/C2779602883","wikidata":"https://www.wikidata.org/wiki/Q15544750","display_name":"Memory architecture","level":2,"score":0.43639999628067017},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.3880999982357025},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3741999864578247},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.36640000343322754},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.36559998989105225},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.35499998927116394},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.34940001368522644},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.34450000524520874},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.28949999809265137},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.2838999927043915},{"id":"https://openalex.org/C63511323","wikidata":"https://www.wikidata.org/wiki/Q908936","display_name":"Interleaved memory","level":4,"score":0.2824999988079071},{"id":"https://openalex.org/C3720319","wikidata":"https://www.wikidata.org/wiki/Q5015937","display_name":"Cache-only memory architecture","level":5,"score":0.2775999903678894},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.2750000059604645},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2732999920845032},{"id":"https://openalex.org/C98025372","wikidata":"https://www.wikidata.org/wiki/Q477538","display_name":"Systems architecture","level":3,"score":0.2556999921798706}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dac63849.2025.11133343","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11133343","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W2124509324","https://openalex.org/W2138662031","https://openalex.org/W2161449253","https://openalex.org/W2963265099","https://openalex.org/W2963469388","https://openalex.org/W3043840704","https://openalex.org/W3111375540","https://openalex.org/W3138516171","https://openalex.org/W3160050461","https://openalex.org/W4280496502","https://openalex.org/W4386763970","https://openalex.org/W4389518760","https://openalex.org/W4392427708","https://openalex.org/W4394998968","https://openalex.org/W4395106409","https://openalex.org/W4400909749","https://openalex.org/W4401212085","https://openalex.org/W4402671766","https://openalex.org/W4404133752"],"related_works":[],"abstract_inverted_index":{"Long-context":[0],"inference":[1,101],"has":[2,41],"become":[3],"a":[4,45,79,107],"central":[5],"focus":[6],"in":[7,16],"recent":[8],"self-regressive":[9],"Transformer":[10],"research.":[11],"However,":[12],"challenges":[13],"still":[14],"remain":[15],"performing":[17],"decode":[18],"stage":[19],"due":[20],"to":[21,63,99,115,153],"the":[22,30,69,132,159],"memory":[23,32,118,126],"bandwidth":[24,121],"bottleneck":[25],"of":[26,139,166],"attention":[27,39,82,93],"mechanisms":[28],"and":[29,73,120,173],"substantial":[31],"overhead":[33],"associated":[34],"with":[35,85,143,146,158,163],"KV":[36,129],"cache.":[37],"Although":[38],"sparsity":[40,49,83],"been":[42],"proposed":[43],"as":[44],"potential":[46],"solution,":[47],"conventional":[48],"methods":[50],"that":[51],"rely":[52],"on":[53,136],"heuristic":[54],"algorithms":[55],"often":[56],"suffer":[57],"from":[58],"accuracy":[59,164],"degradation":[60],"when":[61],"applied":[62],"ultra-long":[64],"sequences.":[65],"To":[66],"break":[67],"through":[68,96],"dilemma":[70],"between":[71],"accuracy-performance":[72],"bandwidth-capacity,":[74],"this":[75],"work":[76],"proposes":[77],"DIAS,":[78],"distancebased":[80],"irregular":[81],"approach":[84],"processing-inmemory":[86],"(PIM)":[87],"architecture.":[88],"DIAS":[89,140],"employs":[90],"approximate":[91],"topK":[92],"(AKAttention)":[94],"scores":[95],"graph-based":[97],"search":[98],"improve":[100],"efficiency":[102],"while":[103],"maintaining":[104],"accuracy.":[105],"Furthermore,":[106],"scalable":[108],"tree-like":[109],"PIM":[110,133],"(TreePIM)":[111],"architecture":[112],"is":[113],"introduced":[114],"achieve":[116],"both":[117],"capacity":[119],"improvement":[122],"by":[123],"isolating":[124],"enormous":[125],"access":[127],"for":[128,141],"cache":[130],"into":[131],"units.":[134],"Evaluations":[135],"various":[137],"configurations":[138],"Longbench":[142],"Llama3-405B":[144],"models":[145],"1":[147],"M":[148],"sequence":[149],"length":[150],"show":[151],"up":[152],"75":[154],"times":[155],"speedup":[156],"compared":[157],"state-of-the-art":[160],"LLM":[161],"accelerator,":[162],"drop":[165],"less":[167],"than":[168],"$1":[169],"\\%$.":[170],"Index":[171],"Terms-AI":[172],"Machine":[174],"Learning,":[175],"Architecture":[176],"&":[177],"System":[178],"Design":[179]},"counts_by_year":[],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
