{"id":"https://openalex.org/W4414197093","doi":"https://doi.org/10.1109/dac63849.2025.11132610","title":"ReaLM: Reliable and Efficient Large Language Model Inference with Statistical Algorithm-Based Fault Tolerance","display_name":"ReaLM: Reliable and Efficient Large Language Model Inference with Statistical Algorithm-Based Fault Tolerance","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4414197093","doi":"https://doi.org/10.1109/dac63849.2025.11132610"},"language":"en","primary_location":{"id":"doi:10.1109/dac63849.2025.11132610","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132610","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113124505","display_name":"Tong Xie","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tong Xie","raw_affiliation_strings":["Peking University,School of Integrated Circuits,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Peking University,School of Integrated Circuits,Beijing,China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081892558","display_name":"Jiawang Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiawang Zhao","raw_affiliation_strings":["Institute for Artificial Intelligence,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute for Artificial Intelligence,Beijing,China","institution_ids":["https://openalex.org/I4210100255"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076856438","display_name":"Zishen Wan","orcid":"https://orcid.org/0000-0002-2982-5351"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zishen Wan","raw_affiliation_strings":["Georgia Institute of Technology,Atlanta,GA,USA"],"affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology,Atlanta,GA,USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045601421","display_name":"Zuodong Zhang","orcid":"https://orcid.org/0000-0002-8496-6114"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zuodong Zhang","raw_affiliation_strings":["Peking University,School of Integrated Circuits,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Peking University,School of Integrated Circuits,Beijing,China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100681377","display_name":"Yuan Wang","orcid":"https://orcid.org/0000-0002-8559-2441"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuan Wang","raw_affiliation_strings":["Peking University,School of Integrated Circuits,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Peking University,School of Integrated Circuits,Beijing,China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002760019","display_name":"Runsheng Wang","orcid":"https://orcid.org/0000-0002-7514-0767"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Runsheng Wang","raw_affiliation_strings":["Peking University,School of Integrated Circuits,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Peking University,School of Integrated Circuits,Beijing,China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050500375","display_name":"Ru Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ru Huang","raw_affiliation_strings":["Peking University,School of Integrated Circuits,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Peking University,School of Integrated Circuits,Beijing,China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100457556","display_name":"Meng Li","orcid":"https://orcid.org/0000-0003-2123-2444"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Li","raw_affiliation_strings":["Institute for Artificial Intelligence,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute for Artificial Intelligence,Beijing,China","institution_ids":["https://openalex.org/I4210100255"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5113124505"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":2.3567,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.90740979,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9465000033378601,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9465000033378601,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11396","display_name":"Artificial Intelligence in Healthcare","score":0.9204999804496765,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9031000137329102,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fault-tolerance","display_name":"Fault tolerance","score":0.6424999833106995},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6218000054359436},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6201000213623047},{"id":"https://openalex.org/keywords/error-detection-and-correction","display_name":"Error detection and correction","score":0.5763000249862671},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.49000000953674316},{"id":"https://openalex.org/keywords/perplexity","display_name":"Perplexity","score":0.4715000092983246},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4544999897480011},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.3228999972343445},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.3183000087738037}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7315000295639038},{"id":"https://openalex.org/C63540848","wikidata":"https://www.wikidata.org/wiki/Q3140932","display_name":"Fault tolerance","level":2,"score":0.6424999833106995},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6218000054359436},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6201000213623047},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.5763000249862671},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.49000000953674316},{"id":"https://openalex.org/C200601418","wikidata":"https://www.wikidata.org/wiki/Q2193887","display_name":"Reliability engineering","level":1,"score":0.48339998722076416},{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.4715000092983246},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4544999897480011},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3723999857902527},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.3228999972343445},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.3183000087738037},{"id":"https://openalex.org/C167391956","wikidata":"https://www.wikidata.org/wiki/Q1401211","display_name":"Fault model","level":3,"score":0.3138999938964844},{"id":"https://openalex.org/C152745839","wikidata":"https://www.wikidata.org/wiki/Q5438153","display_name":"Fault detection and isolation","level":3,"score":0.30559998750686646},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.2906999886035919},{"id":"https://openalex.org/C167981075","wikidata":"https://www.wikidata.org/wiki/Q2667186","display_name":"Sandbox (software development)","level":2,"score":0.288100004196167},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.2816999852657318},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.2775000035762787},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.27639999985694885},{"id":"https://openalex.org/C2775928411","wikidata":"https://www.wikidata.org/wiki/Q2041312","display_name":"Fault injection","level":3,"score":0.2734000086784363},{"id":"https://openalex.org/C126953365","wikidata":"https://www.wikidata.org/wiki/Q5438152","display_name":"Fault coverage","level":3,"score":0.27000001072883606},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.2678999900817871},{"id":"https://openalex.org/C134261354","wikidata":"https://www.wikidata.org/wiki/Q938438","display_name":"Statistical inference","level":2,"score":0.2590000033378601},{"id":"https://openalex.org/C50712370","wikidata":"https://www.wikidata.org/wiki/Q4269346","display_name":"Software fault tolerance","level":3,"score":0.25780001282691956},{"id":"https://openalex.org/C13625343","wikidata":"https://www.wikidata.org/wiki/Q7627418","display_name":"Stuck-at fault","level":4,"score":0.2574999928474426}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dac63849.2025.11132610","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132610","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1980073965","https://openalex.org/W2010922160","https://openalex.org/W2058373514","https://openalex.org/W2083613288","https://openalex.org/W2507722844","https://openalex.org/W2612733213","https://openalex.org/W2767260595","https://openalex.org/W2772488910","https://openalex.org/W2791673912","https://openalex.org/W2795147506","https://openalex.org/W2808976322","https://openalex.org/W2809188712","https://openalex.org/W2888482885","https://openalex.org/W2903175372","https://openalex.org/W2920866063","https://openalex.org/W2946522000","https://openalex.org/W2946609015","https://openalex.org/W2946682676","https://openalex.org/W2953212265","https://openalex.org/W2963015836","https://openalex.org/W2963396341","https://openalex.org/W2963640628","https://openalex.org/W3011997511","https://openalex.org/W3013918852","https://openalex.org/W3091833323","https://openalex.org/W3105084904","https://openalex.org/W3171842021","https://openalex.org/W3182269065","https://openalex.org/W3189398641","https://openalex.org/W3213097409","https://openalex.org/W4211189626","https://openalex.org/W4236432903","https://openalex.org/W4238485759","https://openalex.org/W4242565052","https://openalex.org/W4321608012","https://openalex.org/W4379116028","https://openalex.org/W4385453267","https://openalex.org/W4386212580","https://openalex.org/W4388212323","https://openalex.org/W4388624644","https://openalex.org/W4389166676","https://openalex.org/W4394998734"],"related_works":[],"abstract_inverted_index":{"The":[0],"demand":[1],"for":[2],"efficient":[3,71],"large":[4,34],"language":[5,110],"model":[6],"(LLM)":[7],"inference":[8,47],"has":[9],"propelled":[10],"the":[11,54,87,93,123,137],"development":[12],"of":[13,58,96,105,147],"dedicated":[14],"accelerators.":[15],"As":[16],"accelerators":[17],"are":[18],"vulnerable":[19],"to":[20,24,44,61,126,141,171,174,192],"hardware":[21],"faults":[22],"due":[23],"aging,":[25],"variation,":[26],"etc,":[27],"existing":[28,175],"accelerator":[29],"designs":[30],"often":[31,52],"reserve":[32],"a":[33,79,100,116,143],"voltage":[35],"margin":[36],"or":[37],"leverage":[38],"algorithm-based":[39],"fault":[40,56,94],"tolerance":[41,57,95],"(ABFT)":[42],"techniques":[43],"ensure":[45],"LLM":[46,72,196],"correctness.":[48],"However,":[49],"previous":[50],"methods":[51],"overlook":[53],"inherent":[55],"LLMs,":[59],"leading":[60],"high":[62],"computation":[63],"and":[64,108,159,186],"energy":[65,188],"overhead.":[66],"To":[67],"enable":[68,142],"reliable":[69],"yet":[70],"inference,":[73],"in":[74],"this":[75],"paper,":[76],"we":[77,90,114],"propose":[78,115],"novel":[80],"algorithm/circuit":[81],"co-design":[82],"framework,":[83],"dubbed":[84],"ReaLM.":[85],"For":[86],"first":[88],"time,":[89],"systematically":[91],"characterize":[92],"LLMs":[97,107],"by":[98,190],"performing":[99],"large-scale":[101],"error":[102,124,128,138,148,199],"injection":[103,200],"study":[104],"representative":[106],"natural":[109],"understanding":[111],"tasks.":[112],"Then,":[113],"statistical":[117],"ABFT":[118],"algorithm":[119],"that":[120,153],"fully":[121],"leverages":[122],"robustness":[125],"minimize":[127],"recovery":[129,180],"as":[130,132],"much":[131],"possible.":[133],"We":[134],"also":[135],"customize":[136],"detection":[139],"circuits":[140],"low-cost":[144],"online":[145],"collection":[146],"statistics.":[149],"Extensive":[150],"experiments":[151],"show":[152],"with":[154],"only":[155],"1.42%":[156],"circuit":[157],"area":[158],"1.79%":[160],"power":[161],"overhead,":[162],"our":[163],"ReaLM":[164,177],"can":[165],"reduce":[166],"perplexity":[167],"degradation":[168],"from":[169],"18.54":[170],"0.29.":[172],"Compared":[173],"methods,":[176],"consistently":[178],"reduces":[179],"costs":[181],"across":[182],"different":[183],"operating":[184],"voltages":[185],"improves":[187],"efficiency":[189],"up":[191],"35.83%":[193],"without":[194],"compromising":[195],"performance.":[197],"Our":[198],"code":[201],"is":[202],"available":[203],"at":[204],"https://github.com/PKU-SEC-Lab/ReaLM_DAC25/.":[205]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
