{"id":"https://openalex.org/W4395073472","doi":"https://doi.org/10.1145/3620666.3651368","title":"8-bit Transformer Inference and Fine-tuning for Edge Accelerators","display_name":"8-bit Transformer Inference and Fine-tuning for Edge Accelerators","publication_year":2024,"publication_date":"2024-04-24","ids":{"openalex":"https://openalex.org/W4395073472","doi":"https://doi.org/10.1145/3620666.3651368"},"language":"en","primary_location":{"id":"doi:10.1145/3620666.3651368","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3620666.3651368","pdf_url":null,"source":null,"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102968925","display_name":"Jeffrey Yu","orcid":"https://orcid.org/0000-0001-9643-7490"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jeffrey Yu","raw_affiliation_strings":["Electrical Engineering, Stanford University, Stanford, CA, United States of America"],"raw_orcid":"https://orcid.org/0000-0001-9643-7490","affiliations":[{"raw_affiliation_string":"Electrical Engineering, Stanford University, Stanford, CA, United States of America","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062937114","display_name":"Kartik Prabhu","orcid":"https://orcid.org/0000-0002-4179-1692"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kartik Prabhu","raw_affiliation_strings":["Electrical Engineering, Stanford University, Stanford, CA, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-4179-1692","affiliations":[{"raw_affiliation_string":"Electrical Engineering, Stanford University, Stanford, CA, United States of America","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5095854318","display_name":"Yonatan Urman","orcid":"https://orcid.org/0000-0002-5763-8174"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yonatan Urman","raw_affiliation_strings":["Electrical Engineering, Stanford University, Stanford, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-5763-8174","affiliations":[{"raw_affiliation_string":"Electrical Engineering, Stanford University, Stanford, CA, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033289364","display_name":"Robert M. Radway","orcid":"https://orcid.org/0000-0003-3393-5489"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Robert M. Radway","raw_affiliation_strings":["Electrical Engineering, Stanford University, Stanford, CA, USA"],"raw_orcid":"https://orcid.org/0000-0003-3393-5489","affiliations":[{"raw_affiliation_string":"Electrical Engineering, Stanford University, Stanford, CA, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5095854319","display_name":"Eric Han","orcid":"https://orcid.org/0009-0009-0528-8318"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eric Han","raw_affiliation_strings":["Electrical Engineering, Stanford University, Stanford, CA, USA"],"raw_orcid":"https://orcid.org/0009-0009-0528-8318","affiliations":[{"raw_affiliation_string":"Electrical Engineering, Stanford University, Stanford, CA, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029554261","display_name":"Priyanka Raina","orcid":"https://orcid.org/0000-0002-8834-8663"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Priyanka Raina","raw_affiliation_strings":["Electrical Engineering, Stanford University, Stanford, CA, United States of America"],"raw_orcid":"https://orcid.org/0000-0002-8834-8663","affiliations":[{"raw_affiliation_string":"Electrical Engineering, Stanford University, Stanford, CA, United States of America","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102968925"],"corresponding_institution_ids":["https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":3.8055,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.93790773,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"5","last_page":"21"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11044","display_name":"Particle Detector Development and Performance","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/3106","display_name":"Nuclear and High Energy Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10323","display_name":"Analog and Mixed-Signal Circuit Design","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6170347332954407},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6036630868911743},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.538855254650116},{"id":"https://openalex.org/keywords/enhanced-data-rates-for-gsm-evolution","display_name":"Enhanced Data Rates for GSM Evolution","score":0.4938870072364807},{"id":"https://openalex.org/keywords/bit","display_name":"Bit (key)","score":0.4164485037326813},{"id":"https://openalex.org/keywords/electronic-engineering","display_name":"Electronic engineering","score":0.3898658752441406},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.30883824825286865},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.14359289407730103},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.14306464791297913},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.13779684901237488},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12336468696594238}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6170347332954407},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6036630868911743},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.538855254650116},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.4938870072364807},{"id":"https://openalex.org/C117011727","wikidata":"https://www.wikidata.org/wiki/Q1278488","display_name":"Bit (key)","level":2,"score":0.4164485037326813},{"id":"https://openalex.org/C24326235","wikidata":"https://www.wikidata.org/wiki/Q126095","display_name":"Electronic engineering","level":1,"score":0.3898658752441406},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.30883824825286865},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.14359289407730103},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.14306464791297913},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.13779684901237488},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12336468696594238}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3620666.3651368","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3620666.3651368","pdf_url":null,"source":null,"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6899999976158142,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1821462560","https://openalex.org/W2743322459","https://openalex.org/W2763421725","https://openalex.org/W2803113791","https://openalex.org/W2899063892","https://openalex.org/W2923014074","https://openalex.org/W2963341956","https://openalex.org/W2963748441","https://openalex.org/W2965373594","https://openalex.org/W2981852735","https://openalex.org/W3009434609","https://openalex.org/W3016829100","https://openalex.org/W3034457371","https://openalex.org/W3096609285","https://openalex.org/W3138516171","https://openalex.org/W3145450063","https://openalex.org/W3158073526","https://openalex.org/W3168867926","https://openalex.org/W3183327882","https://openalex.org/W3196851536","https://openalex.org/W4214893857","https://openalex.org/W4226353746","https://openalex.org/W4287117014","https://openalex.org/W4288089799","https://openalex.org/W4292779060","https://openalex.org/W4297812065","https://openalex.org/W4309591680","https://openalex.org/W4381713178","https://openalex.org/W4385245566","https://openalex.org/W6759579507"],"related_works":["https://openalex.org/W2411923897","https://openalex.org/W4394546135","https://openalex.org/W4285347720","https://openalex.org/W4200259850","https://openalex.org/W2333831899","https://openalex.org/W2484894494","https://openalex.org/W4206178588","https://openalex.org/W3094491777","https://openalex.org/W3214715529","https://openalex.org/W4287635093"],"abstract_inverted_index":{"Transformer":[0,58,78],"models":[1],"achieve":[2],"state-of-the-art":[3],"accuracy":[4],"on":[5,30],"natural":[6],"language":[7],"processing":[8],"(NLP)":[9],"and":[10,17,27,46,65,90],"vision":[11],"tasks,":[12],"but":[13,60,80],"demand":[14],"significant":[15],"computation":[16,45],"memory":[18,47],"resources,":[19],"which":[20],"makes":[21],"it":[22],"difficult":[23],"to":[24,34,43,87],"perform":[25],"inference":[26],"training":[28],"(fine-tuning)":[29],"edge":[31],"accelerators.":[32],"Quantization":[33],"lower":[35],"precision":[36,64],"data":[37],"types":[38],"is":[39],"a":[40],"promising":[41],"way":[42],"reduce":[44],"resources.":[48],"Prior":[49],"work":[50,82],"has":[51,74],"employed":[52],"8-bit":[53,70],"integer":[54],"(int8)":[55],"quantization":[56,73],"for":[57,68,77],"inference,":[59],"int8":[61],"lacks":[62],"the":[63,85,92,95],"range":[66],"required":[67],"training.":[69],"floating-point":[71],"(FP8)":[72],"been":[75],"used":[76],"training,":[79],"prior":[81],"only":[83],"quantizes":[84],"inputs":[86],"matrix":[88],"multiplications":[89],"leaves":[91],"rest":[93],"of":[94],"operations":[96],"in":[97],"high":[98],"precision.":[99]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":14}],"updated_date":"2025-12-26T23:08:49.675405","created_date":"2025-10-10T00:00:00"}
