{"id":"https://openalex.org/W4411486373","doi":"https://doi.org/10.1145/3695053.3731073","title":"AiF: Accelerating On-Device LLM Inference Using In-Flash Processing","display_name":"AiF: Accelerating On-Device LLM Inference Using In-Flash Processing","publication_year":2025,"publication_date":"2025-06-20","ids":{"openalex":"https://openalex.org/W4411486373","doi":"https://doi.org/10.1145/3695053.3731073"},"language":"en","primary_location":{"id":"doi:10.1145/3695053.3731073","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3695053.3731073","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731073","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731073","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100369462","display_name":"Jaeyong Lee","orcid":"https://orcid.org/0000-0002-2724-8888"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Jaeyong Lee","raw_affiliation_strings":["Seoul National University, Seoul, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0002-2724-8888","affiliations":[{"raw_affiliation_string":"Seoul National University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hyeunjoo Kim","orcid":"https://orcid.org/0009-0000-2045-637X"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hyeunjoo Kim","raw_affiliation_strings":["Seoul National University, Seoul, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0000-2045-637X","affiliations":[{"raw_affiliation_string":"Seoul National University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102188916","display_name":"Sanghun Oh","orcid":null},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sanghun Oh","raw_affiliation_strings":["Seoul National University, Seoul, Republic of Korea"],"raw_orcid":"https://orcid.org/0009-0002-7669-112X","affiliations":[{"raw_affiliation_string":"Seoul National University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000625614","display_name":"Myoungjun Chun","orcid":"https://orcid.org/0000-0002-8188-4324"},"institutions":[{"id":"https://openalex.org/I141371507","display_name":"Soongsil University","ror":"https://ror.org/017xnm587","country_code":"KR","type":"education","lineage":["https://openalex.org/I141371507"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Myoungjun Chun","raw_affiliation_strings":["Soongsil University, Seoul, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0002-8188-4324","affiliations":[{"raw_affiliation_string":"Soongsil University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I141371507"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025062056","display_name":"Myungsuk Kim","orcid":"https://orcid.org/0000-0002-8667-3198"},"institutions":[{"id":"https://openalex.org/I31419693","display_name":"Kyungpook National University","ror":"https://ror.org/040c17130","country_code":"KR","type":"education","lineage":["https://openalex.org/I31419693"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Myungsuk Kim","raw_affiliation_strings":["Kyungpook National University, Daegu, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0002-8667-3198","affiliations":[{"raw_affiliation_string":"Kyungpook National University, Daegu, Republic of Korea","institution_ids":["https://openalex.org/I31419693"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5062560097","display_name":"Jihong Kim","orcid":"https://orcid.org/0000-0002-7977-9883"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jihong Kim","raw_affiliation_strings":["Seoul National University, Seoul, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0002-7977-9883","affiliations":[{"raw_affiliation_string":"Seoul National University, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100369462"],"corresponding_institution_ids":["https://openalex.org/I139264467"],"apc_list":null,"apc_paid":null,"fwci":8.147,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.97405715,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"529","last_page":"543"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9632999897003174,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7032032608985901},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6035060882568359},{"id":"https://openalex.org/keywords/flash","display_name":"Flash (photography)","score":0.6028963923454285},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24548190832138062}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7032032608985901},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6035060882568359},{"id":"https://openalex.org/C2777526259","wikidata":"https://www.wikidata.org/wiki/Q221836","display_name":"Flash (photography)","level":2,"score":0.6028963923454285},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24548190832138062},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3695053.3731073","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3695053.3731073","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731073","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3695053.3731073","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3695053.3731073","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731073","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G198881990","display_name":null,"funder_award_id":"RS-2024-00414964","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G4294718623","display_name":null,"funder_award_id":"RS-2024-00347394","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G7298062262","display_name":null,"funder_award_id":"RS-2024-00456287","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"}],"funders":[{"id":"https://openalex.org/F4320320671","display_name":"National Research Foundation","ror":"https://ror.org/05s0g1g46"},{"id":"https://openalex.org/F4320321292","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542"},{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"},{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4411486373.pdf","grobid_xml":"https://content.openalex.org/works/W4411486373.grobid-xml"},"referenced_works_count":54,"referenced_works":["https://openalex.org/W1951464309","https://openalex.org/W1994895020","https://openalex.org/W2004376136","https://openalex.org/W2066792000","https://openalex.org/W2155370145","https://openalex.org/W2294851045","https://openalex.org/W2398201093","https://openalex.org/W2489439822","https://openalex.org/W2558722810","https://openalex.org/W2766040387","https://openalex.org/W2776946878","https://openalex.org/W2805732642","https://openalex.org/W2884496840","https://openalex.org/W2898581147","https://openalex.org/W2907857652","https://openalex.org/W2924600575","https://openalex.org/W2946609015","https://openalex.org/W2952348083","https://openalex.org/W2966983573","https://openalex.org/W2979937837","https://openalex.org/W2985022115","https://openalex.org/W3106304288","https://openalex.org/W3112282291","https://openalex.org/W3127197434","https://openalex.org/W3134048768","https://openalex.org/W3155143326","https://openalex.org/W3155243801","https://openalex.org/W3205140122","https://openalex.org/W4214559301","https://openalex.org/W4281790502","https://openalex.org/W4281850905","https://openalex.org/W4283256902","https://openalex.org/W4285058279","https://openalex.org/W4293023436","https://openalex.org/W4293024107","https://openalex.org/W4294068540","https://openalex.org/W4308083511","https://openalex.org/W4308083518","https://openalex.org/W4308083744","https://openalex.org/W4312704108","https://openalex.org/W4317794296","https://openalex.org/W4318541554","https://openalex.org/W4321488298","https://openalex.org/W4360831773","https://openalex.org/W4360831969","https://openalex.org/W4366341968","https://openalex.org/W4366549000","https://openalex.org/W4366967372","https://openalex.org/W4389519530","https://openalex.org/W4393407046","https://openalex.org/W4393591641","https://openalex.org/W4395106425","https://openalex.org/W4401212085","https://openalex.org/W4402671659"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"While":[0],"large":[1],"language":[2],"models":[3],"(LLMs)":[4],"achieve":[5],"remarkable":[6],"performance":[7,58],"across":[8],"diverse":[9],"application":[10],"domains,":[11],"their":[12],"substantial":[13],"memory":[14,38,186],"demands":[15],"present":[16],"challenges,":[17],"especially":[18],"on":[19,52,112,156],"personal":[20],"devices":[21],"with":[22,145,182],"limited":[23,63,109],"DRAM":[24],"capacity.Recent":[25],"LLM":[26,129],"inference":[27,48,144],"engines":[28],"have":[29],"introduced":[30],"SSD":[31,171],"offloading":[32,172],"for":[33,75,127],"model":[34],"parameters":[35,130],"to":[36,56,61,150,169],"reduce":[37],"footprint.However,":[39],"the":[40,62,97,108],"highly":[41],"memory-bound":[42],"nature":[43],"of":[44,65,101],"on-device":[45,76],"LLMs":[46,159],"makes":[47],"speed":[49],"heavily":[50],"dependent":[51],"read":[53,121,141],"bandwidth,":[54],"leading":[55],"significant":[57],"degradation":[59],"due":[60],"bandwidth":[64,100,142],"SSDs.In":[66],"this":[67,113],"paper,":[68],"we":[69],"propose":[70],"an":[71],"in-flash":[72,92],"processing":[73],"solution":[74],"LLM,":[77],"called":[78],"Accelerator-in-Flash":[79],"(AiF),":[80],"which":[81],"integrates":[82],"matrix-vector":[83],"multiplication":[84],"(GEMV)":[85],"operations":[86],"directly":[87],"into":[88],"flash":[89,102,120,133],"chips.By":[90],"enabling":[91],"GEMV":[93],"operations,":[94],"AiF":[95,116,162,174],"leverages":[96],"high":[98],"internal":[99,140],"chips":[103],"without":[104],"being":[105],"constrained":[106],"by":[107],"external":[110],"bandwidth.Building":[111],"core":[114],"structure,":[115],"employs":[117],"two":[118],"novel":[119],"techniques":[122],"that":[123,161],"were":[124],"specifically":[125],"optimized":[126],"reading":[128],"stored":[131],"in":[132,139],"memory.AiF":[134],"achieves":[135],"a":[136,164,183],"4x":[137],"boost":[138],"during":[143],"minimal":[146],"implementation":[147],"overhead,":[148],"thanks":[149],"its":[151],"streamlined":[152],"error":[153],"correction":[154],"process.Evaluations":[155],"eight":[157],"real-world":[158],"reveal":[160],"provides":[163],"14.6x":[165],"throughput":[166,181],"improvement":[167],"compared":[168],"baseline":[170],"schemes.Furthermore,":[173],"surpasses":[175],"in-memory":[176],"inference,":[177],"delivering":[178],"1.4x":[179],"higher":[180],"significantly":[184],"reduced":[185],"footprint.":[187]},"counts_by_year":[{"year":2026,"cited_by_count":7}],"updated_date":"2026-05-30T09:04:40.226872","created_date":"2025-10-10T00:00:00"}
