{"id":"https://openalex.org/W4409282707","doi":"https://doi.org/10.1145/3676536.3676753","title":"An Agile Framework for Efficient LLM Accelerator Development and Model Inference","display_name":"An Agile Framework for Efficient LLM Accelerator Development and Model Inference","publication_year":2024,"publication_date":"2024-10-27","ids":{"openalex":"https://openalex.org/W4409282707","doi":"https://doi.org/10.1145/3676536.3676753"},"language":"en","primary_location":{"id":"doi:10.1145/3676536.3676753","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3676536.3676753","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3676536.3676753","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 43rd IEEE/ACM International Conference on Computer-Aided Design","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3676536.3676753","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033151335","display_name":"Lvcheng Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lvcheng Chen","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0003-2480-6191","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069977898","display_name":"Ying Wu","orcid":"https://orcid.org/0000-0002-4898-9343"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Wu","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-4898-9343","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064569231","display_name":"Chenyi Wen","orcid":"https://orcid.org/0000-0002-6508-2639"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenyi Wen","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-6508-2639","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109775646","display_name":"Shizhang Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I74525822","display_name":"Hubei University of Technology","ror":"https://ror.org/02d3fj342","country_code":"CN","type":"education","lineage":["https://openalex.org/I74525822"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shizhang Wang","raw_affiliation_strings":["Hubei University of Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0005-5853-4931","affiliations":[{"raw_affiliation_string":"Hubei University of Technology, Wuhan, China","institution_ids":["https://openalex.org/I74525822"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100425657","display_name":"Li Zhang","orcid":"https://orcid.org/0000-0002-8951-4969"},"institutions":[{"id":"https://openalex.org/I74525822","display_name":"Hubei University of Technology","ror":"https://ror.org/02d3fj342","country_code":"CN","type":"education","lineage":["https://openalex.org/I74525822"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Li Zhang","raw_affiliation_strings":["Hubei University of Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0002-8951-4969","affiliations":[{"raw_affiliation_string":"Hubei University of Technology, Wuhan, China","institution_ids":["https://openalex.org/I74525822"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051340429","display_name":"Bei Yu","orcid":"https://orcid.org/0000-0001-6406-4810"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Bei Yu","raw_affiliation_strings":["The Chinese University of Hong Kong, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0001-6406-4810","affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100427492","display_name":"Qi Sun","orcid":"https://orcid.org/0000-0001-5153-6698"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qi Sun","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0001-5153-6698","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054211420","display_name":"Cheng Zhuo","orcid":"https://orcid.org/0000-0002-2610-7522"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng Zhuo","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-2610-7522","affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.0784,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.93886486,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11044","display_name":"Particle Detector Development and Performance","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/3106","display_name":"Nuclear and High Energy Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11044","display_name":"Particle Detector Development and Performance","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/3106","display_name":"Nuclear and High Energy Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10346","display_name":"Magnetic confinement fusion research","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/3106","display_name":"Nuclear and High Energy Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/agile-software-development","display_name":"Agile software development","score":0.8168496489524841},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.730553150177002},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5586487054824829},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.4702640473842621},{"id":"https://openalex.org/keywords/model-driven-development","display_name":"Model driven development","score":0.4103958010673523},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.3667374551296234},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3620213270187378},{"id":"https://openalex.org/keywords/unified-modeling-language","display_name":"Unified Modeling Language","score":0.32197070121765137},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2713935077190399},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.1583009958267212},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12892159819602966},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.073767751455307}],"concepts":[{"id":"https://openalex.org/C14185376","wikidata":"https://www.wikidata.org/wiki/Q30232","display_name":"Agile software development","level":2,"score":0.8168496489524841},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.730553150177002},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5586487054824829},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.4702640473842621},{"id":"https://openalex.org/C2985178602","wikidata":"https://www.wikidata.org/wiki/Q1941909","display_name":"Model driven development","level":4,"score":0.4103958010673523},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.3667374551296234},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3620213270187378},{"id":"https://openalex.org/C145644426","wikidata":"https://www.wikidata.org/wiki/Q169411","display_name":"Unified Modeling Language","level":3,"score":0.32197070121765137},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2713935077190399},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.1583009958267212},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12892159819602966},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.073767751455307}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3676536.3676753","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3676536.3676753","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3676536.3676753","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 43rd IEEE/ACM International Conference on Computer-Aided Design","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3676536.3676753","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3676536.3676753","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3676536.3676753","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 43rd IEEE/ACM International Conference on Computer-Aided Design","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.47999998927116394}],"awards":[{"id":"https://openalex.org/G2781527179","display_name":null,"funder_award_id":"62034007","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322927","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4409282707.pdf","grobid_xml":"https://content.openalex.org/works/W4409282707.grobid-xml"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W1982886636","https://openalex.org/W1983394510","https://openalex.org/W2121788702","https://openalex.org/W2233304223","https://openalex.org/W2625954420","https://openalex.org/W2626696598","https://openalex.org/W2886805397","https://openalex.org/W2896457183","https://openalex.org/W2921683824","https://openalex.org/W2970796375","https://openalex.org/W2998600257","https://openalex.org/W3027968530","https://openalex.org/W3037639070","https://openalex.org/W3096609285","https://openalex.org/W3107984262","https://openalex.org/W3169517138","https://openalex.org/W3170647102","https://openalex.org/W3178941450","https://openalex.org/W3197068519","https://openalex.org/W4200145863","https://openalex.org/W4281758439","https://openalex.org/W4293024135","https://openalex.org/W4296973164","https://openalex.org/W4308083752","https://openalex.org/W4309953168","https://openalex.org/W4313597979","https://openalex.org/W4322718191","https://openalex.org/W4360831810","https://openalex.org/W4384648639","https://openalex.org/W4386721862","https://openalex.org/W4386841315","https://openalex.org/W4387797345","https://openalex.org/W4391670689","https://openalex.org/W4392011959","https://openalex.org/W4393578753","https://openalex.org/W6608315494","https://openalex.org/W6630562399","https://openalex.org/W6739901393","https://openalex.org/W6778883912","https://openalex.org/W6804808475","https://openalex.org/W6811340617","https://openalex.org/W6849805803","https://openalex.org/W6857055897","https://openalex.org/W6991685655"],"related_works":["https://openalex.org/W2656997359","https://openalex.org/W4240110559","https://openalex.org/W2094372386","https://openalex.org/W1973385172","https://openalex.org/W4319430762","https://openalex.org/W2294820933","https://openalex.org/W1964871273","https://openalex.org/W2164325773","https://openalex.org/W2111450559","https://openalex.org/W1991038278"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"have":[4],"revolutionized":[5],"many":[6],"domains":[7],"with":[8],"exceptional":[9],"performance":[10,66],"while":[11],"their":[12,16],"large":[13],"sizes":[14],"hinder":[15],"broad":[17],"applicability,":[18],"especially":[19],"in":[20,112,123,176],"the":[21,34,61,81,95,99,106,113,151,158,168],"edge":[22,178],"computation":[23],"scenarios.":[24],"Designing":[25],"large-scale":[26],"LLM-specific":[27],"accelerators":[28,180],"is":[29,91],"also":[30],"challenging,":[31],"suffering":[32],"from":[33],"complicated,":[35],"cumbersome,":[36],"and":[37,41,68,74,101,124,138,141,162,171,181],"time-consuming":[38],"design,":[39,140,160],"simulation,":[40,161],"optimization":[42,163],"process.":[43],"This":[44],"paper":[45],"meticulously":[46],"proposes":[47],"an":[48,70],"agile":[49,136],"framework":[50,148,175],"for":[51],"accelerator":[52,73,137],"development,":[53],"supporting":[54],"efficient":[55],"LLM":[56,179,183],"inference.":[57,184],"Firstly,":[58],"we":[59],"investigate":[60],"architecture":[62],"of":[63,83,108,173],"LLMs,":[64],"uncover":[65],"bottlenecks,":[67],"design":[69],"optimized":[71],"binarized":[72],"a":[75,87],"configurable":[76],"RISC-V-based":[77],"SoC":[78,139],"to":[79,93,105,149],"boost":[80],"inference":[82,142],"binary":[84],"LLMs.":[85],"Further,":[86],"novel":[88],"fidelity-driven":[89],"method":[90,155],"proposed":[92],"learn":[94],"multi-fidelity":[96],"representation,":[97],"solving":[98],"modeling":[100],"accuracy":[102],"issues":[103],"due":[104],"lack":[107],"accurate":[109],"later-stage":[110],"data":[111],"EDA":[114],"flow,":[115],"by":[116],"capturing":[117],"complex":[118],"relationships":[119],"among":[120],"simulation":[121,143],"metrics":[122],"across":[125,130],"different":[126],"fidelities.":[127],"Tailored":[128],"strategies":[129],"model":[131],"preparation,":[132],"backend":[133],"kernel":[134],"implementations,":[135],"are":[144],"incorporated":[145],"into":[146],"our":[147,174],"refine":[150],"development":[152],"workflow.":[153],"Our":[154],"significantly":[156],"accelerates":[157],"hardware":[159],"processes.":[164],"Experimental":[165],"results":[166],"illustrate":[167],"impressive":[169],"speed":[170],"effectiveness":[172],"designing":[177],"optimizing":[182]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
