{"id":"https://openalex.org/W4411486400","doi":"https://doi.org/10.1145/3695053.3731064","title":"AMALI: An Analytical Model for Accurately Modeling LLM Inference on Modern GPUs","display_name":"AMALI: An Analytical Model for Accurately Modeling LLM Inference on Modern GPUs","publication_year":2025,"publication_date":"2025-06-20","ids":{"openalex":"https://openalex.org/W4411486400","doi":"https://doi.org/10.1145/3695053.3731064"},"language":"en","primary_location":{"id":"doi:10.1145/3695053.3731064","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3695053.3731064","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731064","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731064","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Shiheng Cao","orcid":"https://orcid.org/0009-0002-9705-7216"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shiheng Cao","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"raw_orcid":"https://orcid.org/0009-0002-9705-7216","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077675506","display_name":"Junmin Wu","orcid":"https://orcid.org/0009-0001-3136-6721"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]},{"id":"https://openalex.org/I308837","display_name":"Suzhou University of Science and Technology","ror":"https://ror.org/04en8wb91","country_code":"CN","type":"education","lineage":["https://openalex.org/I308837"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junmin Wu","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China and Suzhou Institute for Advanced Research, University of Science and Technology of China, Suzhou, China"],"raw_orcid":"https://orcid.org/0009-0001-3136-6721","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China and Suzhou Institute for Advanced Research, University of Science and Technology of China, Suzhou, China","institution_ids":["https://openalex.org/I308837","https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101436158","display_name":"Junshi Chen","orcid":"https://orcid.org/0000-0002-6487-3658"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junshi Chen","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"raw_orcid":"https://orcid.org/0000-0002-6487-3658","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085758579","display_name":"Hong An","orcid":"https://orcid.org/0000-0002-3900-3722"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hong An","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"raw_orcid":"https://orcid.org/0000-0002-3900-3722","affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048614443","display_name":"Zhibin Yu","orcid":"https://orcid.org/0000-0001-8067-9612"},"institutions":[{"id":"https://openalex.org/I4210145761","display_name":"Shenzhen Institutes of Advanced Technology","ror":"https://ror.org/04gh4er46","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210145761"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhibin Yu","raw_affiliation_strings":["Shenzhen Institutes of Advanced Technology(SIAT), Chinese Academy of Science(CAS), Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-8067-9612","affiliations":[{"raw_affiliation_string":"Shenzhen Institutes of Advanced Technology(SIAT), Chinese Academy of Science(CAS), Shenzhen, China","institution_ids":["https://openalex.org/I4210145761"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":4.3093,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.93439589,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1495","last_page":"1508"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10522","display_name":"Medical Imaging Techniques and Applications","score":0.9904000163078308,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6235376000404358},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6126822233200073},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3499446213245392},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.20033147931098938}],"concepts":[{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6235376000404358},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6126822233200073},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3499446213245392},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20033147931098938}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3695053.3731064","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3695053.3731064","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731064","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3695053.3731064","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3695053.3731064","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3695053.3731064","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4411486400.pdf","grobid_xml":"https://content.openalex.org/works/W4411486400.grobid-xml"},"referenced_works_count":43,"referenced_works":["https://openalex.org/W1979527452","https://openalex.org/W2016939324","https://openalex.org/W2033597569","https://openalex.org/W2038666141","https://openalex.org/W2047060659","https://openalex.org/W2080592089","https://openalex.org/W2093043622","https://openalex.org/W2103742924","https://openalex.org/W2106562406","https://openalex.org/W2113282196","https://openalex.org/W2130336316","https://openalex.org/W2134860672","https://openalex.org/W2142769604","https://openalex.org/W2144264070","https://openalex.org/W2163687928","https://openalex.org/W2167334577","https://openalex.org/W2239144794","https://openalex.org/W2409690919","https://openalex.org/W2417175077","https://openalex.org/W2736244279","https://openalex.org/W2921788688","https://openalex.org/W2930604630","https://openalex.org/W2950480563","https://openalex.org/W2953033921","https://openalex.org/W2953264125","https://openalex.org/W2979340153","https://openalex.org/W2984189761","https://openalex.org/W2991330024","https://openalex.org/W2997701623","https://openalex.org/W3102510044","https://openalex.org/W3104094521","https://openalex.org/W3105696640","https://openalex.org/W3130554079","https://openalex.org/W3157055696","https://openalex.org/W3210604792","https://openalex.org/W4237229330","https://openalex.org/W4243970730","https://openalex.org/W4281689389","https://openalex.org/W4308090436","https://openalex.org/W4311457721","https://openalex.org/W4393592128","https://openalex.org/W4401211642","https://openalex.org/W4401211811"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Large":[0],"language":[1],"model":[2,22,85,90,109,158],"(LLM)":[3],"inference":[4,48,75,92,168,181],"applications":[5,49],"are":[6],"surging":[7],"in":[8],"recent":[9],"years,":[10],"which":[11],"largely":[12],"relies":[13],"on":[14,50,93,173],"modern":[15,51,94,124],"GPUs.On":[16],"the":[17,113,120,188],"other":[18],"hand,":[19],"GPU":[20,39,152,176],"analytical":[21,40,84,128],"is":[23],"a":[24,82,156],"commonly":[25],"used":[26],"tool":[27],"for":[28,73,123,130],"architects":[29],"to":[30,88,118,139,150,165,196],"precisely":[31],"identify":[32],"bottlenecks":[33],"quickly":[34],"with":[35,96],"deep":[36],"insights.However,":[37],"existing":[38],"models":[41,129],"fall":[42],"short":[43],"of":[44,54],"accurately":[45,89,111],"modeling":[46,67,122],"LLM":[47,74,91,167,180],"GPUs,":[52],"because":[53],"unsuitable":[55],"tensor":[56,107],"core":[57,108],"modeling,":[58],"ignoring":[59],"constant":[60,131],"cache":[61,66,132,135],"as":[62,64],"well":[63],"instruction":[65,102,134,162],"and":[68,104,133],"abstracting":[69],"away":[70],"important":[71],"details":[72],"applications.To":[76],"address":[77],"this":[78],"problem,":[79],"we":[80,99,126,154],"propose":[81,127],"novel":[83],"dubbed":[86],"AMALI":[87,172,186],"GPUs":[95],"three":[97],"innovations.First,":[98],"develop":[100],"an":[101,174],"modifier":[103],"throughput":[105],"based":[106],"by":[110,136,159,177],"capturing":[112],"math":[114],"pipe":[115],"throttle":[116],"stalls":[117],"enhance":[119],"architecture":[121],"GPUs.Second,":[125],"developing":[137],"micro-benchmarks":[138],"measure":[140],"CUDA":[141],"kernel":[142],"launching":[143],"latencies.This":[144],"significantly":[145],"improves":[146],"AMALI's":[147],"accuracy":[148],"compared":[149],"real":[151],"hardware.Finally,":[153],"design":[155],"multi-warp":[157],"leveraging":[160],"warp":[161],"number":[163],"distribution":[164],"reflect":[166],"application":[169],"characteristics.We":[170],"validate":[171],"A100":[175],"using":[178],"typical":[179],"applications.The":[182],"results":[183],"show":[184],"that":[185],"reduces":[187],"MAPE":[189],"(mean":[190],"absolute":[191],"percentage":[192],"error)":[193],"from":[194],"127.56%":[195],"23.":[197]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
