{"id":"https://openalex.org/W4385834084","doi":"https://doi.org/10.1109/lca.2023.3305386","title":"Unleashing the Potential of PIM: Accelerating Large Batched Inference of Transformer-Based Generative Models","display_name":"Unleashing the Potential of PIM: Accelerating Large Batched Inference of Transformer-Based Generative Models","publication_year":2023,"publication_date":"2023-07-01","ids":{"openalex":"https://openalex.org/W4385834084","doi":"https://doi.org/10.1109/lca.2023.3305386"},"language":"en","primary_location":{"id":"doi:10.1109/lca.2023.3305386","is_oa":true,"landing_page_url":"https://doi.org/10.1109/lca.2023.3305386","pdf_url":"https://ieeexplore.ieee.org/ielx7/10208/4357966/10218731.pdf","source":{"id":"https://openalex.org/S17643076","display_name":"IEEE Computer Architecture Letters","issn_l":"1556-6056","issn":["1556-6056","1556-6064","2473-2575"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Computer Architecture Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://ieeexplore.ieee.org/ielx7/10208/4357966/10218731.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032839104","display_name":"Jaewan Choi","orcid":"https://orcid.org/0000-0003-2447-4369"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Jaewan Choi","raw_affiliation_strings":["Seoul National University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100618952","display_name":"Jaehyun Park","orcid":"https://orcid.org/0000-0001-5623-6985"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jaehyun Park","raw_affiliation_strings":["Seoul National University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002219153","display_name":"Kwanhee Kyung","orcid":"https://orcid.org/0000-0003-4243-2111"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Kwanhee Kyung","raw_affiliation_strings":["Seoul National University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037648751","display_name":"Nam Sung Kim","orcid":"https://orcid.org/0000-0002-0442-5634"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nam Sung Kim","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Champaign, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Champaign, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078262826","display_name":"Jung Ho Ahn","orcid":"https://orcid.org/0000-0003-1733-1394"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jung Ho Ahn","raw_affiliation_strings":["Seoul National University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Seoul National University, Seoul, South Korea","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5032839104"],"corresponding_institution_ids":["https://openalex.org/I139264467"],"apc_list":null,"apc_paid":null,"fwci":4.2252,"has_fulltext":true,"cited_by_count":14,"citation_normalized_percentile":{"value":0.95047242,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"22","issue":"2","first_page":"113","last_page":"116"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7841237783432007},{"id":"https://openalex.org/keywords/byte","display_name":"Byte","score":0.5158149600028992},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5135793685913086},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5111854672431946},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5017287731170654},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.49411657452583313},{"id":"https://openalex.org/keywords/dram","display_name":"Dram","score":0.425618052482605},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.41275736689567566},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.38782936334609985},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3536420464515686},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3503304421901703},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.2506718039512634},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.21050721406936646},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.15711647272109985}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7841237783432007},{"id":"https://openalex.org/C43364308","wikidata":"https://www.wikidata.org/wiki/Q8799","display_name":"Byte","level":2,"score":0.5158149600028992},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5135793685913086},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5111854672431946},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5017287731170654},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.49411657452583313},{"id":"https://openalex.org/C7366592","wikidata":"https://www.wikidata.org/wiki/Q1255620","display_name":"Dram","level":2,"score":0.425618052482605},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.41275736689567566},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.38782936334609985},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3536420464515686},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3503304421901703},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2506718039512634},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.21050721406936646},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.15711647272109985},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lca.2023.3305386","is_oa":true,"landing_page_url":"https://doi.org/10.1109/lca.2023.3305386","pdf_url":"https://ieeexplore.ieee.org/ielx7/10208/4357966/10218731.pdf","source":{"id":"https://openalex.org/S17643076","display_name":"IEEE Computer Architecture Letters","issn_l":"1556-6056","issn":["1556-6056","1556-6064","2473-2575"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Computer Architecture Letters","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/lca.2023.3305386","is_oa":true,"landing_page_url":"https://doi.org/10.1109/lca.2023.3305386","pdf_url":"https://ieeexplore.ieee.org/ielx7/10208/4357966/10218731.pdf","source":{"id":"https://openalex.org/S17643076","display_name":"IEEE Computer Architecture Letters","issn_l":"1556-6056","issn":["1556-6056","1556-6064","2473-2575"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Computer Architecture Letters","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.9100000262260437,"display_name":"Affordable and clean energy"}],"awards":[{"id":"https://openalex.org/G5004571498","display_name":null,"funder_award_id":"2021-0-00863","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G6072120315","display_name":null,"funder_award_id":"funded","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G7427225465","display_name":null,"funder_award_id":"2021-0-00863","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"},{"id":"https://openalex.org/G8732482030","display_name":null,"funder_award_id":"SRC program","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"}],"funders":[{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320332195","display_name":"Samsung","ror":"https://ror.org/04w3jy968"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4385834084.pdf","grobid_xml":"https://content.openalex.org/works/W4385834084.grobid-xml"},"referenced_works_count":14,"referenced_works":["https://openalex.org/W2761132374","https://openalex.org/W2973727699","https://openalex.org/W3189166979","https://openalex.org/W3207399097","https://openalex.org/W4280496502","https://openalex.org/W4287704453","https://openalex.org/W4292779060","https://openalex.org/W4308083513","https://openalex.org/W4385245566","https://openalex.org/W6739901393","https://openalex.org/W6767997687","https://openalex.org/W6778883912","https://openalex.org/W6781533629","https://openalex.org/W6842393504"],"related_works":["https://openalex.org/W2950304420","https://openalex.org/W3164092048","https://openalex.org/W4306707339","https://openalex.org/W4385573721","https://openalex.org/W3039805635","https://openalex.org/W1764185321","https://openalex.org/W2948197522","https://openalex.org/W3146091044","https://openalex.org/W4321392417","https://openalex.org/W4287241953"],"abstract_inverted_index":{"Transformer-based":[0],"generative":[1],"models,":[2],"such":[3],"as":[4],"GPT,":[5],"summarize":[6],"an":[7,182,261,268],"input":[8,34,262],"sequence":[9,22,80,172,263,270],"by":[10,23,176,249],"generating":[11],"key/value":[12],"(KV)":[13],"matrices":[14,26,86,135,158,225],"through":[15,87],"attention":[16,104],"and":[17,35,48,128,233,240,251,267],"generate":[18],"the":[19,31,44,61,68,76,92,125,129,133,141,153,156,170,177,216,220,223,227,238,255],"corresponding":[20,180],"output":[21,36,171,183,269],"utilizing":[24],"these":[25,65],"once":[27,161],"per":[28,120],"token":[29],"of":[30,46,79,124,132,140,187,196,222,246,265,272],"sequence.":[32],"Both":[33],"sequences":[37],"tend":[38],"to":[39,59,71,148,169,181],"get":[40],"longer,":[41],"which":[42,151],"improves":[43,237],"understanding":[45],"contexts":[47],"conversation":[49],"quality.":[50],"These":[51],"models":[52],"are":[53,98,159],"also":[54],"typically":[55],"batched":[56],"for":[57,106,205,243],"inference":[58,107],"improve":[60],"serving":[62,247],"throughput.":[63],"All":[64],"trends":[66],"enable":[67],"models'":[69],"weights":[70],"be":[72,192,202],"reused":[73],"effectively,":[74],"increasing":[75],"relative":[77],"importance":[78],"generation,":[81],"especially":[82],"in":[83],"processing":[84],"KV":[85,113,134,157,224],"attention.":[88,206],"We":[89,207],"identify":[90],"that":[91,139,155],"conventional":[93],"computing":[94],"platforms":[95],"(e.g.,":[96],"GPUs)":[97],"not":[99],"efficient":[100],"at":[101],"handling":[102],"this":[103],"part":[105],"because":[108],"each":[109,174,214],"request":[110],"generates":[111],"different":[112],"matrices,":[114],"it":[115],"has":[116],"a":[117],"low":[118],"operation":[119],"byte":[121],"ratio":[122],"regardless":[123],"batch":[126],"size,":[127],"aggregate":[130],"size":[131],"can":[136],"even":[137],"surpass":[138],"entire":[142],"model":[143],"weights.":[144],"This":[145],"motivates":[146],"us":[147],"propose":[149],"AttAcc,":[150],"exploits":[152],"fact":[154],"written":[160],"during":[162],"summarization":[163],"but":[164],"used":[165],"many":[166],"times":[167,253],"(proportional":[168],"length),":[173],"multiplied":[175],"embedding":[178,217],"vector":[179,218],"token.":[184],"The":[185],"volume":[186],"data":[188],"entering/leaving":[189],"AttAcc":[190,209,236],"could":[191],"more":[193],"than":[194,199],"orders":[195],"magnitude":[197],"smaller":[198],"what":[200],"should":[201],"read":[203],"internally":[204],"design":[208],"with":[210,219,260],"multiple":[211],"processing-in-memory":[212],"devices,":[213,228],"multiplying":[215],"portion":[221],"within":[226],"saving":[229],"external":[230],"(inter-device)":[231],"bandwidth":[232],"energy":[234,241],"consumption.":[235],"performance":[239],"efficiency":[242],"DRAM":[244],"access":[245],"GPT-3":[248],"3.19":[250],"3.22":[252],"over":[254],"DGX":[256],"A100":[257],"640":[258],"GB":[259],"length":[264,271],"2048":[266],"128.":[273]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":7}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
