{"id":"https://openalex.org/W4380881077","doi":"https://doi.org/10.1145/3579371.3589057","title":"FACT: FFN-Attention Co-optimized Transformer Architecture with Eager Correlation Prediction","display_name":"FACT: FFN-Attention Co-optimized Transformer Architecture with Eager Correlation Prediction","publication_year":2023,"publication_date":"2023-06-16","ids":{"openalex":"https://openalex.org/W4380881077","doi":"https://doi.org/10.1145/3579371.3589057"},"language":"en","primary_location":{"id":"doi:10.1145/3579371.3589057","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3579371.3589057","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3579371.3589057","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 50th Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3579371.3589057","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071432313","display_name":"Yubin Qin","orcid":"https://orcid.org/0000-0001-5530-5416"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yubin Qin","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5530-5416","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021484887","display_name":"Yang Wang","orcid":"https://orcid.org/0000-0002-8293-8881"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Wang","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-8293-8881","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037648551","display_name":"Dazheng Deng","orcid":"https://orcid.org/0009-0006-4229-4985"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dazheng Deng","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0006-4229-4985","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101935571","display_name":"Zhiren Zhao","orcid":"https://orcid.org/0009-0007-6074-7324"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiren Zhao","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0007-6074-7324","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101975562","display_name":"Xiaolong Yang","orcid":"https://orcid.org/0009-0003-0105-8351"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaolong Yang","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0003-0105-8351","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100358856","display_name":"Leibo Liu","orcid":"https://orcid.org/0000-0001-7548-4116"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Leibo Liu","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7548-4116","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036023084","display_name":"Shaojun Wei","orcid":"https://orcid.org/0000-0001-5117-7920"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaojun Wei","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-5117-7920","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066434078","display_name":"Yang Hu","orcid":"https://orcid.org/0000-0001-6942-4395"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Hu","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-6942-4395","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054524841","display_name":"Shouyi Yin","orcid":"https://orcid.org/0000-0003-2309-572X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shouyi Yin","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-2309-572X","affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5071432313"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":11.0682,"has_fulltext":true,"cited_by_count":94,"citation_normalized_percentile":{"value":0.98985931,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"14"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7864419221878052},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.7534060478210449},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5769495964050293},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5698620080947876},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.5448129773139954},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.446415513753891},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.4280526638031006},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3223370313644409},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3062666058540344},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08791995048522949}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7864419221878052},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.7534060478210449},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5769495964050293},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5698620080947876},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.5448129773139954},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.446415513753891},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.4280526638031006},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3223370313644409},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3062666058540344},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08791995048522949},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3579371.3589057","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3579371.3589057","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3579371.3589057","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 50th Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3579371.3589057","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3579371.3589057","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3579371.3589057","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 50th Annual International Symposium on Computer Architecture","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.9100000262260437,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[{"id":"https://openalex.org/G338081660","display_name":null,"funder_award_id":"62125403","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3483935958","display_name":null,"funder_award_id":"Grant 62125403","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5146812782","display_name":null,"funder_award_id":"Grant 2021ZD0114400","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G7028138989","display_name":null,"funder_award_id":"2021ZD0114400","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G8284251335","display_name":null,"funder_award_id":"Grant Z221100007722023","funder_id":"https://openalex.org/F4320325902","funder_display_name":"Beijing Municipal Science and Technology Commission"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320325902","display_name":"Beijing Municipal Science and Technology Commission","ror":null},{"id":"https://openalex.org/F4320329777","display_name":"Beijing National Research Center For Information Science And Technology","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335791","display_name":"Beijing Advanced Innovation Center for Intelligent Robots and Systems, Beijing Institute of Technology","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4380881077.pdf","grobid_xml":"https://content.openalex.org/works/W4380881077.grobid-xml"},"referenced_works_count":30,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2560674852","https://openalex.org/W2747329762","https://openalex.org/W2963341956","https://openalex.org/W2963748441","https://openalex.org/W2980113464","https://openalex.org/W2981852735","https://openalex.org/W2984100107","https://openalex.org/W3017024317","https://openalex.org/W3105802176","https://openalex.org/W3109309915","https://openalex.org/W3138516171","https://openalex.org/W3157114665","https://openalex.org/W3172942063","https://openalex.org/W3185702163","https://openalex.org/W3200664681","https://openalex.org/W4206223617","https://openalex.org/W4214686755","https://openalex.org/W4224267386","https://openalex.org/W4233743233","https://openalex.org/W4239088979","https://openalex.org/W4245659846","https://openalex.org/W4280557024","https://openalex.org/W4287887264","https://openalex.org/W4288089799","https://openalex.org/W4291653336","https://openalex.org/W4312349930","https://openalex.org/W4313033857","https://openalex.org/W4313467238","https://openalex.org/W4320722432"],"related_works":["https://openalex.org/W2055243143","https://openalex.org/W4206178588","https://openalex.org/W3094491777","https://openalex.org/W3214715529","https://openalex.org/W4287635093","https://openalex.org/W3094340691","https://openalex.org/W4221166601","https://openalex.org/W4396520111","https://openalex.org/W2929170389","https://openalex.org/W4300097863"],"abstract_inverted_index":{"Transformer":[0,37,210,250],"model":[1,38],"is":[2,45,255],"becoming":[3],"prevalent":[4],"in":[5,35,79,116,177,186],"various":[6],"AI":[7],"applications":[8],"with":[9,74,123,142,190],"its":[10,23],"outstanding":[11],"performance.":[12],"However,":[13],"the":[14,30,46,54,59,104,113,124,130,147,161,171,187,205,208,214,248],"high":[15],"cost":[16],"of":[17,93,150,163,207,244],"computation":[18,33,115,175],"and":[19,49,119,174,222,236],"memory":[20,183],"footprint":[21],"make":[22],"inference":[24],"inefficient.":[25],"We":[26,95],"discover":[27],"that":[28,51,169,201],"among":[29],"three":[31,91,143],"main":[32],"modules":[34,92],"a":[36,191],"(QKV":[39],"generation,":[40],"attention":[41,60,105],"computation,":[42,61],"FFN),":[43],"it":[44],"QKV":[47,108,117],"generation":[48,118],"FFN":[50,122,189],"contribute":[52],"to":[53,137,230],"most":[55,64],"power":[56,70],"cost.":[57],"While":[58],"focused":[62],"by":[63,152,211],"previous":[65,231],"works,":[66],"only":[67],"has":[68],"decent":[69],"share":[71],"when":[72,226],"dealing":[73],"extremely":[75],"long":[76],"inputs.":[77],"Therefore,":[78],"this":[80],"paper,":[81],"we":[82,133],"propose":[83,97,134],"FACT,":[84],"an":[85,98,166,219,241],"efficient":[86],"algorithm-hardware":[87],"co-design":[88],"optimizing":[89],"all":[90],"Transformer.":[94],"first":[96],"eager":[99,140,172],"prediction":[100,141,151,164,173],"algorithm":[101],"which":[102,127,254],"predicts":[103],"matrix":[106],"before":[107],"generation.":[109],"It":[110,145,159,180,217],"further":[111],"detects":[112],"unnecessary":[114],"assigns":[120],"mixed-precision":[121,188],"predicted":[125],"attention,":[126,228],"helps":[128],"improve":[129],"throughput.":[131],"Further,":[132,238],"FACT":[135,203,239],"accelerator":[136],"efficiently":[138],"support":[139],"designs.":[144],"avoids":[146,182],"large":[148],"overhead":[149],"using":[153],"log-based":[154],"add-only":[155],"operations":[156],"for":[157],"prediction.":[158],"eliminates":[160],"latency":[162],"through":[165],"out-of-order":[167],"scheduler":[168],"makes":[170],"work":[176],"full":[178],"pipeline.":[179],"additionally":[181],"access":[184],"conflict":[185],"novel":[192],"diagonal":[193],"storage":[194],"pattern.":[195],"Experiments":[196],"on":[197,213,252],"22":[198],"benchmarks":[199],"show":[200],"our":[202],"improves":[204],"throughput":[206],"whole":[209,249],"3.59\u00d7":[212],"geomean":[215],"average.":[216],"achieves":[218,240],"enviable":[220],"47.64\u00d7":[221],"278.1\u00d7":[223],"energy":[224,242],"saving":[225],"computing":[227],"compared":[229],"attention-optimization-only":[232],"SOTA":[233],"works":[234],"ELSA":[235],"Sanger.":[237],"efficiency":[243],"4388":[245],"GOPS/W":[246],"performing":[247],"layer":[251],"average,":[253],"94.98\u00d7":[256],"higher":[257],"than":[258],"Nvidia":[259],"V100":[260],"GPU.":[261]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":58},{"year":2024,"cited_by_count":28},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
