{"id":"https://openalex.org/W4312772368","doi":"https://doi.org/10.1109/ijcnn55064.2022.9892797","title":"AxFormer: Accuracy-driven Approximation of Transformers for Faster, Smaller and more Accurate NLP Models","display_name":"AxFormer: Accuracy-driven Approximation of Transformers for Faster, Smaller and more Accurate NLP Models","publication_year":2022,"publication_date":"2022-07-18","ids":{"openalex":"https://openalex.org/W4312772368","doi":"https://doi.org/10.1109/ijcnn55064.2022.9892797"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn55064.2022.9892797","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn55064.2022.9892797","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063948719","display_name":"Amrit Nagarajan","orcid":"https://orcid.org/0000-0002-2847-4721"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Amrit Nagarajan","raw_affiliation_strings":["School of ECE, Purdue University"],"affiliations":[{"raw_affiliation_string":"School of ECE, Purdue University","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021284824","display_name":"Sanchari Sen","orcid":"https://orcid.org/0000-0003-0080-2882"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sanchari Sen","raw_affiliation_strings":["School of ECE, Purdue University"],"affiliations":[{"raw_affiliation_string":"School of ECE, Purdue University","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044634700","display_name":"Jacob R. Stevens","orcid":"https://orcid.org/0000-0002-7626-2846"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jacob R. Stevens","raw_affiliation_strings":["School of ECE, Purdue University"],"affiliations":[{"raw_affiliation_string":"School of ECE, Purdue University","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065766721","display_name":"Anand Raghunathan","orcid":"https://orcid.org/0000-0002-4624-564X"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anand Raghunathan","raw_affiliation_strings":["School of ECE, Purdue University"],"affiliations":[{"raw_affiliation_string":"School of ECE, Purdue University","institution_ids":["https://openalex.org/I219193219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5063948719"],"corresponding_institution_ids":["https://openalex.org/I219193219"],"apc_list":null,"apc_paid":null,"fwci":0.3116,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.52574876,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8178228139877319},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7950499057769775},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6103534698486328},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5622398853302002},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5432144403457642},{"id":"https://openalex.org/keywords/parameterized-complexity","display_name":"Parameterized complexity","score":0.5052769780158997},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5011160373687744},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.47508516907691956},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3325914740562439},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.218910813331604},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.12575533986091614}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8178228139877319},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7950499057769775},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6103534698486328},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5622398853302002},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5432144403457642},{"id":"https://openalex.org/C165464430","wikidata":"https://www.wikidata.org/wiki/Q1570441","display_name":"Parameterized complexity","level":2,"score":0.5052769780158997},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5011160373687744},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.47508516907691956},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3325914740562439},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.218910813331604},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.12575533986091614},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn55064.2022.9892797","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn55064.2022.9892797","pdf_url":null,"source":{"id":"https://openalex.org/S4363607707","display_name":"2022 International Joint Conference on Neural Networks (IJCNN)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.699999988079071}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306087","display_name":"Semiconductor Research Corporation","ror":"https://ror.org/047z4n946"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W2707890836","https://openalex.org/W2805003733","https://openalex.org/W2896457183","https://openalex.org/W2923014074","https://openalex.org/W2948947170","https://openalex.org/W2963748441","https://openalex.org/W2975381464","https://openalex.org/W2978017171","https://openalex.org/W2980282514","https://openalex.org/W2996428491","https://openalex.org/W2998183051","https://openalex.org/W3006683367","https://openalex.org/W3009043942","https://openalex.org/W3015233032","https://openalex.org/W3015609966","https://openalex.org/W3034340181","https://openalex.org/W3034457371","https://openalex.org/W3101248447","https://openalex.org/W3103754749","https://openalex.org/W3105966348","https://openalex.org/W3176647794","https://openalex.org/W3177265267","https://openalex.org/W4287867774","https://openalex.org/W4288347855","https://openalex.org/W4292779060","https://openalex.org/W6739917289","https://openalex.org/W6751979845","https://openalex.org/W6755207826","https://openalex.org/W6762945437","https://openalex.org/W6768021236","https://openalex.org/W6768080748","https://openalex.org/W6768086466","https://openalex.org/W6768817161","https://openalex.org/W6768851824","https://openalex.org/W6773642575","https://openalex.org/W6774054309","https://openalex.org/W6774743274","https://openalex.org/W6775706467","https://openalex.org/W6778883912","https://openalex.org/W6779124799"],"related_works":["https://openalex.org/W4206178588","https://openalex.org/W3094491777","https://openalex.org/W3214715529","https://openalex.org/W4287635093","https://openalex.org/W4288365749","https://openalex.org/W2936497627","https://openalex.org/W3013624417","https://openalex.org/W4287826556","https://openalex.org/W3098382480","https://openalex.org/W4287598411"],"abstract_inverted_index":{"Transformers":[0,29],"have":[1],"greatly":[2],"advanced":[3],"the":[4,25,104,111,130,135,139],"state-of-the-art":[5],"in":[6,11,38,120],"Natural":[7],"Language":[8],"Processing":[9],"(NLP)":[10],"recent":[12],"years,":[13],"but":[14],"present":[15],"very":[16],"large":[17,36],"computation":[18],"and":[19,42,62,93,100,155,161,181],"storage":[20],"requirements.":[21],"We":[22,65],"observe":[23],"that":[24,54,71,107,147,165,194],"design":[26],"process":[27],"of":[28,103,138],"(pre-train":[30],"a":[31,35,39,68,81],"foundation":[32],"model":[33,131],"on":[34,110,134,159],"dataset":[37],"self-supervised":[40],"manner,":[41],"subsequently":[43],"fine-tune":[44],"it":[45],"for":[46,80],"different":[47],"downstream":[48,83,113],"tasks)":[49],"leads":[50,144],"to":[51,75,145,170,178,183,207],"task-specific":[52],"models":[53,79,146,167],"are":[55,148,168],"highly":[56],"over-parameterized,":[57],"adversely":[58],"impacting":[59],"both":[60],"accuracy":[61],"inference":[63],"efficiency.":[64],"propose":[66],"AxFormer,":[67],"systematic":[69],"framework":[70],"applies":[72],"accuracy-driven":[73,91],"approximations":[74],"create":[76],"optimized":[77],"transformer":[78,106],"given":[82,112],"task.":[84,114],"AxFormer":[85,143,166,195],"combines":[86],"two":[87],"key":[88],"optimizations":[89],"\u2014":[90],"pruning":[92,98],"selective":[94],"hard":[95],"attention.":[96],"Accuracy-driven":[97],"identifies":[99],"removes":[101],"parts":[102,137],"fine-tuned":[105,188],"hinder":[108],"performance":[109],"Sparse":[115],"hard-attention":[116],"optimizes":[117],"attention":[118],"blocks":[119],"selected":[121],"layers":[122],"by":[123],"eliminating":[124],"irrelevant":[125],"word":[126],"aggregations,":[127],"thereby":[128],"helping":[129],"focus":[132],"only":[133],"relevant":[136],"input.":[140],"In":[141,190],"effect,":[142],"more":[149,172],"accurate,":[150,173],"while":[151,174],"also":[152,175],"being":[153,176],"faster":[154,180],"smaller.":[156],"Our":[157],"experiments":[158],"GLUE":[160],"SQUAD":[162],"tasks":[163],"show":[164],"up":[169,177,182],"4.5%":[171],"2.5\u00d7":[179],"3.2\u00d7":[184],"smaller":[185],"than":[186],"conventional":[187],"models.":[189],"addition,":[191],"we":[192],"demonstrate":[193],"can":[196],"be":[197],"combined":[198],"with":[199],"previous":[200],"efforts":[201],"such":[202],"as":[203],"distillation":[204],"or":[205],"quantization":[206],"achieve":[208],"further":[209],"efficiency":[210],"gains.":[211],"Code":[212],"is":[213],"available":[214],"at":[215],"https://github.com/amrnag/Specialized-Transformers.":[216]},"counts_by_year":[{"year":2023,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
