{"id":"https://openalex.org/W4401880492","doi":"https://doi.org/10.1109/vlsitechnologyandcir46783.2024.10631391","title":"A 99.2TOPS/W Transformer Learning Processor with Approximated Attention Score Gradient Computation and Ternary Vector-Based Speculation","display_name":"A 99.2TOPS/W Transformer Learning Processor with Approximated Attention Score Gradient Computation and Ternary Vector-Based Speculation","publication_year":2024,"publication_date":"2024-06-16","ids":{"openalex":"https://openalex.org/W4401880492","doi":"https://doi.org/10.1109/vlsitechnologyandcir46783.2024.10631391"},"language":"en","primary_location":{"id":"doi:10.1109/vlsitechnologyandcir46783.2024.10631391","is_oa":false,"landing_page_url":"https://doi.org/10.1109/vlsitechnologyandcir46783.2024.10631391","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE Symposium on VLSI Technology and Circuits (VLSI Technology and Circuits)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102725563","display_name":"Ping-Sheng Wu","orcid":"https://orcid.org/0000-0003-4484-4048"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Ping-Sheng Wu","raw_affiliation_strings":["Graduate Institute of Electronics Engineering, National Taiwan University,Taipei,Taiwan"],"affiliations":[{"raw_affiliation_string":"Graduate Institute of Electronics Engineering, National Taiwan University,Taipei,Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107207506","display_name":"Yu\u2010Cheng Lin","orcid":"https://orcid.org/0009-0006-7287-5289"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yu-Cheng Lin","raw_affiliation_strings":["Graduate Institute of Electronics Engineering, National Taiwan University,Taipei,Taiwan"],"affiliations":[{"raw_affiliation_string":"Graduate Institute of Electronics Engineering, National Taiwan University,Taipei,Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101750729","display_name":"Chia\u2010Hsiang Yang","orcid":"https://orcid.org/0000-0003-1163-321X"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Chia-Hsiang Yang","raw_affiliation_strings":["Graduate Institute of Electronics Engineering, National Taiwan University,Taipei,Taiwan"],"affiliations":[{"raw_affiliation_string":"Graduate Institute of Electronics Engineering, National Taiwan University,Taipei,Taiwan","institution_ids":["https://openalex.org/I16733864"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102725563"],"corresponding_institution_ids":["https://openalex.org/I16733864"],"apc_list":null,"apc_paid":null,"fwci":1.0245,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.80457705,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"2"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9646999835968018,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9646999835968018,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9581999778747559,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10323","display_name":"Analog and Mixed-Signal Circuit Design","score":0.9415000081062317,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ternary-operation","display_name":"Ternary operation","score":0.7515339851379395},{"id":"https://openalex.org/keywords/speculation","display_name":"Speculation","score":0.7456156611442566},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6534625887870789},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5178542733192444},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.4878661632537842},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.40204036235809326},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.29844605922698975},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.20305362343788147},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.12859350442886353},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12231835722923279},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.0655566155910492},{"id":"https://openalex.org/keywords/finance","display_name":"Finance","score":0.06509137153625488},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.06278577446937561}],"concepts":[{"id":"https://openalex.org/C64452783","wikidata":"https://www.wikidata.org/wiki/Q1524945","display_name":"Ternary operation","level":2,"score":0.7515339851379395},{"id":"https://openalex.org/C47941915","wikidata":"https://www.wikidata.org/wiki/Q107885","display_name":"Speculation","level":2,"score":0.7456156611442566},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6534625887870789},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5178542733192444},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.4878661632537842},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.40204036235809326},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.29844605922698975},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.20305362343788147},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12859350442886353},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12231835722923279},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0655566155910492},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.06509137153625488},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.06278577446937561}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/vlsitechnologyandcir46783.2024.10631391","is_oa":false,"landing_page_url":"https://doi.org/10.1109/vlsitechnologyandcir46783.2024.10631391","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE Symposium on VLSI Technology and Circuits (VLSI Technology and Circuits)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F2461203286","display_name":"National Science and Technology Council","ror":"https://ror.org/02kv4zf79"},{"id":"https://openalex.org/F4320308258","display_name":"Qualcomm","ror":"https://ror.org/002zrf773"},{"id":"https://openalex.org/F4320312995","display_name":"Scripps Research Institute","ror":"https://ror.org/02dxx6824"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":2,"referenced_works":["https://openalex.org/W4230760062","https://openalex.org/W4240861882"],"related_works":["https://openalex.org/W4243792164","https://openalex.org/W2147782221","https://openalex.org/W2076161440","https://openalex.org/W3107943460","https://openalex.org/W2799624154","https://openalex.org/W4250172181","https://openalex.org/W2489870153","https://openalex.org/W3091416521","https://openalex.org/W2003989576","https://openalex.org/W2381171456"],"abstract_inverted_index":{"This":[0],"work":[1],"presents":[2],"the":[3,25,36,64],"first":[4],"Transformer":[5,66],"learning":[6],"processor":[7],"supporting":[8],"both":[9],"inference":[10],"and":[11,21],"training":[12,26],"acceleration.":[13],"Byapplying":[14],"algorithm-architecture":[15],"optimizations,":[16],"including":[17],"approxi-mated":[18],"gradient":[19],"computation":[20],"ternary":[22],"vector-based":[23],"speculation,":[24],"complexity":[27],"is":[28],"reduced":[29],"by":[30,69],"up":[31],"to":[32],"94.2%.":[33],"Adoption":[34],"of":[35,58],"8-bit":[37],"block":[38],"floating-point":[39],"(Block-FP)":[40],"format":[41],"enables":[42],"a":[43,54],"39-to-60%":[44],"power":[45],"reduction":[46],"for":[47],"multiply-accumulate":[48],"(MAC)":[49],"operations.":[50],"The":[51],"chip":[52],"delivers":[53],"peak":[55],"energy":[56],"efficiency":[57],"99.":[59],"<tex":[60,70],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[61,71],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$2\\text{TOPS}/\\mathrm{W}$</tex>,":[62],"outperforming":[63],"state-of-the-art":[65],"inference-only":[67],"processors":[68],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$2.6-\\text{to}-162\\times$</tex>.":[72]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
