{"id":"https://openalex.org/W4403337153","doi":"https://doi.org/10.1145/3656019.3676949","title":"Improving Throughput-oriented LLM Inference with CPU Computations","display_name":"Improving Throughput-oriented LLM Inference with CPU Computations","publication_year":2024,"publication_date":"2024-10-11","ids":{"openalex":"https://openalex.org/W4403337153","doi":"https://doi.org/10.1145/3656019.3676949"},"language":"en","primary_location":{"id":"doi:10.1145/3656019.3676949","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3656019.3676949","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3656019.3676949","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051797786","display_name":"Daon Park","orcid":"https://orcid.org/0000-0003-2312-3049"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Daon Park","raw_affiliation_strings":["Seoul National University, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Seoul National University, Republic of Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044493612","display_name":"Bernhard Egger","orcid":"https://orcid.org/0000-0002-6645-6161"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Bernhard Egger","raw_affiliation_strings":["Seoul National University, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Seoul National University, Republic of Korea","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5051797786"],"corresponding_institution_ids":["https://openalex.org/I139264467"],"apc_list":null,"apc_paid":null,"fwci":2.7471,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.91123085,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"233","last_page":"245"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9854000210762024,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9854000210762024,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9728000164031982,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9657999873161316,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7441384792327881},{"id":"https://openalex.org/keywords/throughput","display_name":"Throughput","score":0.7349357008934021},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6130266189575195},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6087421774864197},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5773899555206299},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.36640554666519165},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.34505850076675415},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3340378403663635},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.22187450528144836},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.16731908917427063},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.13439670205116272}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7441384792327881},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.7349357008934021},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6130266189575195},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6087421774864197},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5773899555206299},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.36640554666519165},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.34505850076675415},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3340378403663635},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.22187450528144836},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.16731908917427063},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.13439670205116272},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3656019.3676949","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3656019.3676949","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3656019.3676949","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3656019.3676949","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2024 International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.699999988079071}],"awards":[{"id":"https://openalex.org/G4187202762","display_name":null,"funder_award_id":"2023-00228255","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"},{"id":"https://openalex.org/G6849789567","display_name":null,"funder_award_id":"21A20151113068","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W2896838216","https://openalex.org/W2964110616","https://openalex.org/W2984100107","https://openalex.org/W2998183051","https://openalex.org/W3131724164","https://openalex.org/W3133458480","https://openalex.org/W3165031368","https://openalex.org/W3185341429","https://openalex.org/W4220738659","https://openalex.org/W4321636575","https://openalex.org/W4382319938","https://openalex.org/W4385438314","https://openalex.org/W4387321091","https://openalex.org/W4389518760","https://openalex.org/W6778883912"],"related_works":["https://openalex.org/W2136583354","https://openalex.org/W2111238207","https://openalex.org/W2055243143","https://openalex.org/W2760721665","https://openalex.org/W2107954672","https://openalex.org/W330130819","https://openalex.org/W2288610023","https://openalex.org/W3001594407","https://openalex.org/W2112044895","https://openalex.org/W3121416282"],"abstract_inverted_index":{"Large":[0],"language":[1,145],"models":[2,123],"(LLMs)":[3],"have":[4],"recently":[5],"captured":[6],"the":[7,31,39,65,73,86,92,134,137,154,156,171],"attention":[8],"of":[9,30,41,75,102,164],"a":[10,14,47,99,110,159],"broad":[11],"audience.":[12],"To":[13],"large":[15,144],"part,":[16],"their":[17],"exceptional":[18],"performance":[19],"in":[20,168],"text":[21],"generation":[22],"was":[23],"made":[24],"possible":[25],"by":[26,63,71,91],"an":[27,162],"exponential":[28],"growth":[29],"model":[32],"parameters.":[33],"This":[34,118],"growth,":[35],"however,":[36,79],"comes":[37],"at":[38],"expense":[40],"significantly":[42],"higher":[43],"operational":[44],"costs":[45],"and":[46,113,136],"decreased":[48],"processing":[49],"speed.":[50],"Recent":[51],"research":[52],"has":[53],"focused":[54],"on":[55,58,124],"running":[56],"LLMs":[57],"commodity":[59],"hardware,":[60,127],"for":[61,132,161],"example,":[62],"employing":[64],"memory":[66],"hierarchy":[67],"to":[68,81,121,153,166],"augment":[69],"throughput":[70,169],"increasing":[72],"number":[74],"batches.":[76],"These":[77],"studies,":[78],"tend":[80],"overlook":[82],"or":[83],"inefficiently":[84],"utilize":[85],"additional":[87],"computational":[88,107],"resources":[89,108],"provided":[90],"CPU.":[93],"In":[94],"this":[95],"paper,":[96],"we":[97],"introduce":[98],"technique":[100,119],"capable":[101],"efficiently":[103],"harnessing":[104],"all":[105],"available":[106],"through":[109],"finely":[111],"tuned":[112],"dynamic":[114],"workload":[115],"allocation":[116],"approach.":[117],"applies":[120],"decoder-based":[122,150],"standard":[125],"general-purpose":[126],"effectively":[128],"minimizing":[129],"idle":[130],"periods":[131],"both":[133],"CPU":[135],"GPU.":[138],"We":[139],"conducted":[140],"experiments":[141],"involving":[142],"various":[143],"models,":[146],"each":[147],"representing":[148],"distinct":[149],"architectures.":[151],"Compared":[152],"state-of-the-art,":[155],"results":[157],"demonstrate":[158],"potential":[160],"increase":[163],"up":[165],"105%":[167],"with":[170],"OPT-30B":[172],"model.":[173]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":6}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
