{"id":"https://openalex.org/W4409248601","doi":"https://doi.org/10.1109/hpca61900.2025.00111","title":"LAD: Efficient Accelerator for Generative Inference of LLM with Locality Aware Decoding","display_name":"LAD: Efficient Accelerator for Generative Inference of LLM with Locality Aware Decoding","publication_year":2025,"publication_date":"2025-03-01","ids":{"openalex":"https://openalex.org/W4409248601","doi":"https://doi.org/10.1109/hpca61900.2025.00111"},"language":"en","primary_location":{"id":"doi:10.1109/hpca61900.2025.00111","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00111","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100324824","display_name":"Haoran Wang","orcid":"https://orcid.org/0000-0002-4622-0119"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haoran Wang","raw_affiliation_strings":["Institute of Computing Technology,Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology,Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015161455","display_name":"Yuming Li","orcid":"https://orcid.org/0009-0004-4414-6682"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuming Li","raw_affiliation_strings":["Institute of Computing Technology,Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology,Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044810210","display_name":"Haobo Xu","orcid":"https://orcid.org/0000-0002-0243-6516"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haobo Xu","raw_affiliation_strings":["Institute of Computing Technology,Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology,Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ying Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Wang","raw_affiliation_strings":["Institute of Computing Technology,Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology,Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002557170","display_name":"Liqi Liu","orcid":"https://orcid.org/0000-0002-1759-2300"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liqi Liu","raw_affiliation_strings":["Institute of Computing Technology,Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology,Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101801967","display_name":"Jun Yang","orcid":"https://orcid.org/0000-0001-7888-2187"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Yang","raw_affiliation_strings":["Institute of Computing Technology,Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology,Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101512833","display_name":"Yinhe Han","orcid":"https://orcid.org/0000-0001-5113-8067"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yinhe Han","raw_affiliation_strings":["Institute of Computing Technology,Chinese Academy of Sciences"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology,Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100324824"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210090176"],"apc_list":null,"apc_paid":null,"fwci":8.8359,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.97077203,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1482","last_page":"1495"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13999","display_name":"Digital Rights Management and Security","score":0.9678000211715698,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13999","display_name":"Digital Rights Management and Security","score":0.9678000211715698,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.8425319194793701},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.743939220905304},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.5924264192581177},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5916607975959778},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5485197305679321},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2791505753993988},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.20579937100410461}],"concepts":[{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.8425319194793701},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.743939220905304},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.5924264192581177},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5916607975959778},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5485197305679321},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2791505753993988},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.20579937100410461},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca61900.2025.00111","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00111","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W2022395113","https://openalex.org/W2034861439","https://openalex.org/W2115605495","https://openalex.org/W2890894339","https://openalex.org/W2963015836","https://openalex.org/W2963536265","https://openalex.org/W2997284037","https://openalex.org/W3017024317","https://openalex.org/W3047848469","https://openalex.org/W3130240120","https://openalex.org/W3159727696","https://openalex.org/W3189877953","https://openalex.org/W3206453033","https://openalex.org/W4308083513","https://openalex.org/W4308083526","https://openalex.org/W4321636575","https://openalex.org/W4360831803","https://openalex.org/W4385245566","https://openalex.org/W4387321091","https://openalex.org/W4393578753","https://openalex.org/W4393949386","https://openalex.org/W4395112660","https://openalex.org/W6719768283","https://openalex.org/W6727099177","https://openalex.org/W6754244489","https://openalex.org/W6768817161","https://openalex.org/W6769692749","https://openalex.org/W6778883912","https://openalex.org/W6782879696","https://openalex.org/W6791481479","https://openalex.org/W6798182279","https://openalex.org/W6803096969","https://openalex.org/W6809885388","https://openalex.org/W6811340617","https://openalex.org/W6838322825","https://openalex.org/W6838633097","https://openalex.org/W6846164622","https://openalex.org/W6846659131","https://openalex.org/W6849514112","https://openalex.org/W6849765749","https://openalex.org/W6850927664","https://openalex.org/W6853192989","https://openalex.org/W6854308872","https://openalex.org/W6854866820","https://openalex.org/W6855970221","https://openalex.org/W6857690716","https://openalex.org/W6857799723","https://openalex.org/W6866033557","https://openalex.org/W6893640197"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W1556451512","https://openalex.org/W1555349535","https://openalex.org/W2380075625","https://openalex.org/W2390279801","https://openalex.org/W4234091740","https://openalex.org/W4391913857","https://openalex.org/W4213350282"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4,189],"emerged":[5],"as":[6,45],"the":[7,25,34,41,52,56,74,85,90,135,145,156,166,199,203,236,240,247,266,271],"cornerstone":[8],"of":[9,84,201,229,242,250],"content":[10],"generation":[11,43,69,109],"applications":[12],"due":[13],"to":[14,17,153,164,232,270],"their":[15],"ability":[16,31],"capture":[18],"relations":[19],"between":[20],"newly":[21],"generated":[22,53,234],"token":[23],"and":[24,124,159,205,260,286],"full":[26],"preceding":[27],"context.":[28],"However,":[29],"this":[30,98,100],"stems":[32],"from":[33],"attention":[35,75,136,179,193,267],"mechanism":[36,76,268],"for":[37,77,80,138,207,265],"decoding":[38,78,148,183,194,215],"that":[39,114,133,171,197,220],"retains":[40],"entire":[42],"history":[44],"key":[46,128,158,204],"value":[47,160,206],"cache":[48,58,92,118,244],"(KV":[49],"cache).":[50],"As":[51],"sequence":[54],"lengthens,":[55],"KV":[57,91,117,243],"expands,":[59],"causing":[60],"a":[61,139],"substantial":[62],"memory":[63],"access":[64],"bottleneck.":[65],"In":[66],"advanced":[67],"LLM":[68,108],"systems":[70],"running":[71],"on":[72,254,281],"GPUs,":[73],"accounts":[79],"more":[81],"than":[82],"50%":[83],"total":[86],"inference":[87],"time":[88],"when":[89,134],"length":[93,241],"reaches":[94],"4096.":[95],"To":[96],"address":[97],"issue,":[99],"paper":[101],"introduces":[102],"LAD":[103,131,221,251],"(Locality":[104],"Aware":[105],"Decoding),":[106],"an":[107,191,225],"accelerator":[110,252],"with":[111,224],"algorithm-hardware":[112],"enhancements":[113],"significantly":[115],"decrease":[116],"access,":[119],"resulting":[120],"in":[121,178],"considerable":[122],"speedups":[123],"energy":[125,263,289],"savings.":[126],"A":[127],"insight":[129],"underlying":[130],"is":[132,151],"score":[137],"specific":[140],"position":[141],"remains":[142],"fixed":[143],"over":[144],"next":[146],"several":[147],"steps,":[149],"it":[150,278],"unnecessary":[152],"repeatedly":[154],"retrieve":[155],"associated":[157],"at":[161],"each":[162],"step":[163],"reproduce":[165],"computation.":[167],"Our":[168],"analysis":[169],"reveals":[170],"numerous":[172],"positions":[173,208],"exhibit":[174],"notable":[175],"numerical":[176],"locality":[177],"scores":[180],"through":[181],"multiple":[182],"steps.":[184],"Leveraging":[185],"these":[186],"insights,":[187],"we":[188],"designed":[190],"innovative":[192],"computation":[195],"method":[196],"decreases":[198],"frequency":[200],"accessing":[202],"demonstrating":[209],"good":[210],"locality,":[211],"all":[212],"while":[213],"maintaining":[214],"accuracy.":[216],"Extensive":[217],"experiments":[218],"show":[219],"generates":[222],"sequences":[223],"average":[226,255,282],"ROUGE-1":[227],"similarity":[228],"97%":[230],"compared":[231,269],"those":[233],"by":[235],"original":[237],"model.":[238],"When":[239],"exceeds":[245],"2048,":[246],"high":[248],"configuration":[249],"achieves":[253,280],"(geomean)":[256],"$10.7":[257],"\\times$":[258,262,284,288],"speedup":[259,285],"$52.4":[261],"efficiency":[264],"A100":[272],"GPU.":[273],"For":[274],"end-to-end":[275],"model":[276],"inference,":[277],"also":[279],"$2.3":[283],"$13.4":[287],"efficiency.":[290]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
