{"id":"https://openalex.org/W4409248659","doi":"https://doi.org/10.1109/hpca61900.2025.00126","title":"PAISE: PIM-Accelerated Inference Scheduling Engine for Transformer-based LLM","display_name":"PAISE: PIM-Accelerated Inference Scheduling Engine for Transformer-based LLM","publication_year":2025,"publication_date":"2025-03-01","ids":{"openalex":"https://openalex.org/W4409248659","doi":"https://doi.org/10.1109/hpca61900.2025.00126"},"language":"en","primary_location":{"id":"doi:10.1109/hpca61900.2025.00126","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00126","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041250916","display_name":"Hyojung Lee","orcid":"https://orcid.org/0000-0002-0471-6650"},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]},{"id":"https://openalex.org/I4387155180","display_name":"Samsung SDS (South Korea)","ror":"https://ror.org/0476bn305","country_code":null,"type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4387155180"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Hyojung Lee","raw_affiliation_strings":["Cloud Research Team, Samsung SDS"],"affiliations":[{"raw_affiliation_string":"Cloud Research Team, Samsung SDS","institution_ids":["https://openalex.org/I2250650973","https://openalex.org/I4387155180"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028282859","display_name":"Daehyeon Baek","orcid":"https://orcid.org/0009-0008-0460-3809"},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]},{"id":"https://openalex.org/I4387155180","display_name":"Samsung SDS (South Korea)","ror":"https://ror.org/0476bn305","country_code":null,"type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4387155180"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Daehyeon Baek","raw_affiliation_strings":["Cloud Research Team, Samsung SDS"],"affiliations":[{"raw_affiliation_string":"Cloud Research Team, Samsung SDS","institution_ids":["https://openalex.org/I2250650973","https://openalex.org/I4387155180"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jimyoung Son","orcid":null},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]},{"id":"https://openalex.org/I4387155180","display_name":"Samsung SDS (South Korea)","ror":"https://ror.org/0476bn305","country_code":null,"type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4387155180"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jimyoung Son","raw_affiliation_strings":["Cloud Research Team, Samsung SDS"],"affiliations":[{"raw_affiliation_string":"Cloud Research Team, Samsung SDS","institution_ids":["https://openalex.org/I2250650973","https://openalex.org/I4387155180"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024917453","display_name":"Jieun Choi","orcid":"https://orcid.org/0000-0002-4259-6231"},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]},{"id":"https://openalex.org/I4387155180","display_name":"Samsung SDS (South Korea)","ror":"https://ror.org/0476bn305","country_code":null,"type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4387155180"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jieun Choi","raw_affiliation_strings":["Cloud Research Team, Samsung SDS"],"affiliations":[{"raw_affiliation_string":"Cloud Research Team, Samsung SDS","institution_ids":["https://openalex.org/I2250650973","https://openalex.org/I4387155180"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Kihyo Moon","orcid":null},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]},{"id":"https://openalex.org/I4387155180","display_name":"Samsung SDS (South Korea)","ror":"https://ror.org/0476bn305","country_code":null,"type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4387155180"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Kihyo Moon","raw_affiliation_strings":["Cloud Research Team, Samsung SDS"],"affiliations":[{"raw_affiliation_string":"Cloud Research Team, Samsung SDS","institution_ids":["https://openalex.org/I2250650973","https://openalex.org/I4387155180"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5110984370","display_name":"Minsung Jang","orcid":null},"institutions":[{"id":"https://openalex.org/I2250650973","display_name":"Samsung (South Korea)","ror":"https://ror.org/04w3jy968","country_code":"KR","type":"company","lineage":["https://openalex.org/I2250650973"]},{"id":"https://openalex.org/I4387155180","display_name":"Samsung SDS (South Korea)","ror":"https://ror.org/0476bn305","country_code":null,"type":"company","lineage":["https://openalex.org/I2250650973","https://openalex.org/I4387155180"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Minsung Jang","raw_affiliation_strings":["Cloud Research Team, Samsung SDS"],"affiliations":[{"raw_affiliation_string":"Cloud Research Team, Samsung SDS","institution_ids":["https://openalex.org/I2250650973","https://openalex.org/I4387155180"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5041250916"],"corresponding_institution_ids":["https://openalex.org/I2250650973","https://openalex.org/I4387155180"],"apc_list":null,"apc_paid":null,"fwci":3.3489,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.91384226,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1707","last_page":"1719"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10972","display_name":"Power Systems Fault Detection","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10972","display_name":"Power Systems Fault Detection","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11343","display_name":"Power Transformer Diagnostics and Insulation","score":0.9886999726295471,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11222","display_name":"Magnetic Properties and Applications","score":0.9750000238418579,"subfield":{"id":"https://openalex.org/subfields/2504","display_name":"Electronic, Optical and Magnetic Materials"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6783826947212219},{"id":"https://openalex.org/keywords/inference-engine","display_name":"Inference engine","score":0.6147149801254272},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5918524861335754},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5576239228248596},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2658672630786896},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.1676855981349945},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.13683679699897766},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.08293044567108154}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6783826947212219},{"id":"https://openalex.org/C46743427","wikidata":"https://www.wikidata.org/wiki/Q1341685","display_name":"Inference engine","level":3,"score":0.6147149801254272},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5918524861335754},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5576239228248596},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2658672630786896},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.1676855981349945},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.13683679699897766},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.08293044567108154}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca61900.2025.00126","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca61900.2025.00126","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Symposium on High Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.7300000190734863}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W3006586535","https://openalex.org/W3081168214","https://openalex.org/W3100710793","https://openalex.org/W3189166979","https://openalex.org/W3206328251","https://openalex.org/W3210580311","https://openalex.org/W4221001402","https://openalex.org/W4226126604","https://openalex.org/W4232168013","https://openalex.org/W4280496502","https://openalex.org/W4297097348","https://openalex.org/W4311457721","https://openalex.org/W4387321091","https://openalex.org/W4388229519","https://openalex.org/W4389476267","https://openalex.org/W4390041933","https://openalex.org/W4390678101","https://openalex.org/W4392427708","https://openalex.org/W4393407316","https://openalex.org/W4394998968","https://openalex.org/W6838322825","https://openalex.org/W6854866820","https://openalex.org/W6856387142"],"related_works":["https://openalex.org/W2057057690","https://openalex.org/W2368184788","https://openalex.org/W2358964818","https://openalex.org/W2359535128","https://openalex.org/W2381332051","https://openalex.org/W4206178588","https://openalex.org/W3094491777","https://openalex.org/W3214715529","https://openalex.org/W4287635093","https://openalex.org/W2321443665"],"abstract_inverted_index":{"Transformer-based":[0],"Large":[1],"Language":[2],"Models":[3],"(LLMs)":[4],"demand":[5],"significant":[6,174],"computational":[7],"and":[8,108,112,140,188],"memory":[9,34,54],"resources":[10,81],"due":[11],"to":[12,40,57,72,82,100,102,159,166,169,176,186],"the":[13,22,41,138,156,178],"autoregressive":[14],"token":[15],"generation":[16],"in":[17,25,86],"decoder":[18,46],"blocks.":[19],"In":[20],"particular,":[21],"attention":[23,157],"layer":[24,158],"LLM":[26,51,181],"models":[27,142],"has":[28],"low":[29],"arithmetic":[30],"intensity":[31],"but":[32],"high":[33],"traffic,":[35],"thus":[36],"requiring":[37],"frequent":[38],"updates":[39],"KV":[42],"matrices":[43],"with":[44,148],"each":[45],"iteration.":[47],"As":[48],"a":[49,66,93],"result,":[50],"inference":[52,84],"becomes":[53],"bound,":[55],"leading":[56],"increased":[58],"latency.":[59],"To":[60],"address":[61],"this,":[62],"we":[63],"introduce":[64],"PAISE,":[65],"framework":[67,90],"leveraging":[68],"Processing-In-Memory":[69],"(PIM)":[70],"technology":[71],"offload":[73,101],"memory-intensive":[74],"tasks.":[75],"PAISE":[76,136],"employs":[77],"GPU-PIM":[78],"heterogeneous":[79],"computing":[80],"optimize":[83],"operations":[85,99],"transformer-based":[87],"LLMs.":[88],"The":[89],"comprises":[91],"(i)":[92],"scheduling":[94],"algorithm":[95],"that":[96,118,154],"decides":[97],"which":[98,183],"PIM":[103,109,116,160],"based":[104],"on":[105,137],"model":[106],"configuration":[107],"hardware":[110],"specifications":[111],"(ii)":[113],"an":[114,144],"enhanced":[115],"kernel":[117],"performs":[119],"transaction-wise":[120],"interleave-batched":[121],"GEMM":[122],"(General":[123],"Matrix":[124],"Multiplication)":[125],"operations,":[126],"maximizing":[127],"data":[128,131],"throughput":[129],"via":[130],"layout":[132],"adjustments.":[133],"We":[134],"implemented":[135],"GPT-2":[139],"Llama2-7B":[141],"using":[143],"AMD":[145],"MI100":[146],"GPU":[147],"HBM-PIM":[149],"devices.":[150],"Our":[151],"evaluations":[152],"show":[153],"offloading":[155],"reduces":[161],"execution":[162],"time":[163],"by":[164],"up":[165],"48.3%":[167],"compared":[168],"GPU-only":[170],"inference,":[171,182],"demonstrating":[172],"PAISE\u2019s":[173],"potential":[175],"enhance":[177],"efficiency":[179],"of":[180],"could":[184],"lead":[185],"faster":[187],"more":[189],"efficient":[190],"AI":[191],"applications.":[192]},"counts_by_year":[{"year":2026,"cited_by_count":3}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
