{"id":"https://openalex.org/W7125808293","doi":"https://doi.org/10.1109/tetc.2026.3655830","title":"Edge-Optimized Vision Transformers: A Co-Designed Hardware-Software Framework for Efficient Attention Mechanism","display_name":"Edge-Optimized Vision Transformers: A Co-Designed Hardware-Software Framework for Efficient Attention Mechanism","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7125808293","doi":"https://doi.org/10.1109/tetc.2026.3655830"},"language":null,"primary_location":{"id":"doi:10.1109/tetc.2026.3655830","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tetc.2026.3655830","pdf_url":null,"source":{"id":"https://openalex.org/S2496326734","display_name":"IEEE Transactions on Emerging Topics in Computing","issn_l":"2168-6750","issn":["2168-6750","2376-4562"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Emerging Topics in Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011721581","display_name":"Nisha Abdul Kareem","orcid":null},"institutions":[{"id":"https://openalex.org/I114845381","display_name":"National Institute of Technology Calicut","ror":"https://ror.org/03yyd7552","country_code":"IN","type":"education","lineage":["https://openalex.org/I114845381"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Nisha Abdul Kareem","raw_affiliation_strings":["Department of Electronics and Communication, National Institute of Technology Calicut, Kozhikode, India"],"raw_orcid":"https://orcid.org/0009-0000-6287-7316","affiliations":[{"raw_affiliation_string":"Department of Electronics and Communication, National Institute of Technology Calicut, Kozhikode, India","institution_ids":["https://openalex.org/I114845381"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Naveen R","orcid":"https://orcid.org/0009-0003-0392-2422"},"institutions":[{"id":"https://openalex.org/I4210145666","display_name":"Embedded Systems (United States)","ror":"https://ror.org/04742eh45","country_code":"US","type":"company","lineage":["https://openalex.org/I4210145666"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Naveen R","raw_affiliation_strings":["Spanidea Systems Pvt Ltd, Bengaluru, India"],"raw_orcid":"https://orcid.org/0009-0003-0392-2422","affiliations":[{"raw_affiliation_string":"Spanidea Systems Pvt Ltd, Bengaluru, India","institution_ids":["https://openalex.org/I4210145666"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068254906","display_name":"Sudhish N. George","orcid":"https://orcid.org/0000-0002-0886-9478"},"institutions":[{"id":"https://openalex.org/I114845381","display_name":"National Institute of Technology Calicut","ror":"https://ror.org/03yyd7552","country_code":"IN","type":"education","lineage":["https://openalex.org/I114845381"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sudhish N. George","raw_affiliation_strings":["Department of Electronics and Communication, National Institute of Technology Calicut, Kozhikode, India"],"raw_orcid":"https://orcid.org/0000-0002-0886-9478","affiliations":[{"raw_affiliation_string":"Department of Electronics and Communication, National Institute of Technology Calicut, Kozhikode, India","institution_ids":["https://openalex.org/I114845381"]}]},{"author_position":"last","author":{"id":null,"display_name":"Chithira P. R","orcid":"https://orcid.org/0000-0003-1279-6416"},"institutions":[{"id":"https://openalex.org/I2799306724","display_name":"Ramakrishna Mission Vidyamandira","ror":"https://ror.org/019wbnq12","country_code":"IN","type":"other","lineage":["https://openalex.org/I2799306724"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Chithira P. R","raw_affiliation_strings":["Department of Electronics and Communication, Government Engineering College Sreekrishnapuram, Palakkad, India"],"raw_orcid":"https://orcid.org/0000-0003-1279-6416","affiliations":[{"raw_affiliation_string":"Department of Electronics and Communication, Government Engineering College Sreekrishnapuram, Palakkad, India","institution_ids":["https://openalex.org/I2799306724"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5011721581"],"corresponding_institution_ids":["https://openalex.org/I114845381"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14030432,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"14","issue":"1","first_page":"184","last_page":"195"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.45179998874664307,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.45179998874664307,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.11949999630451202,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.07190000265836716,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.6723999977111816},{"id":"https://openalex.org/keywords/multiplier","display_name":"Multiplier (economics)","score":0.4885999858379364},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.43720000982284546},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.42719998955726624},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.412200003862381},{"id":"https://openalex.org/keywords/sparse-matrix","display_name":"Sparse matrix","score":0.4074999988079071},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.38850000500679016},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3817000091075897},{"id":"https://openalex.org/keywords/edge-device","display_name":"Edge device","score":0.35839998722076416}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8425999879837036},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.6723999977111816},{"id":"https://openalex.org/C124584101","wikidata":"https://www.wikidata.org/wiki/Q1053266","display_name":"Multiplier (economics)","level":2,"score":0.4885999858379364},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.43720000982284546},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.42719998955726624},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.412200003862381},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.4074999988079071},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.38850000500679016},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.38690000772476196},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3817000091075897},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3686000108718872},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.36059999465942383},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.35839998722076416},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.35670000314712524},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.35420000553131104},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.35269999504089355},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.3425999879837036},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.3391999900341034},{"id":"https://openalex.org/C2776221188","wikidata":"https://www.wikidata.org/wiki/Q21072556","display_name":"Design space exploration","level":2,"score":0.3325999975204468},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.328900009393692},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.3172999918460846},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3147999942302704},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.31310001015663147},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.3070000112056732},{"id":"https://openalex.org/C65232700","wikidata":"https://www.wikidata.org/wiki/Q5656403","display_name":"Hardware architecture","level":3,"score":0.3021000027656555},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.28619998693466187},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2827000021934509},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.2815000116825104},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.2809999883174896},{"id":"https://openalex.org/C3770464","wikidata":"https://www.wikidata.org/wiki/Q775963","display_name":"Smoothing","level":2,"score":0.27720001339912415},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.2761000096797943},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.26809999346733093},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.26660001277923584},{"id":"https://openalex.org/C150741067","wikidata":"https://www.wikidata.org/wiki/Q2377218","display_name":"Systolic array","level":3,"score":0.26579999923706055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tetc.2026.3655830","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tetc.2026.3655830","pdf_url":null,"source":{"id":"https://openalex.org/S2496326734","display_name":"IEEE Transactions on Emerging Topics in Computing","issn_l":"2168-6750","issn":["2168-6750","2376-4562"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Emerging Topics in Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W2025713906","https://openalex.org/W2963363373","https://openalex.org/W3016832937","https://openalex.org/W3017024317","https://openalex.org/W3048903267","https://openalex.org/W3089646476","https://openalex.org/W3094502228","https://openalex.org/W3138516171","https://openalex.org/W3159727696","https://openalex.org/W3168317852","https://openalex.org/W3206453033","https://openalex.org/W4213019189","https://openalex.org/W4214588794","https://openalex.org/W4280595844","https://openalex.org/W4308479898","https://openalex.org/W4309620166","https://openalex.org/W4312569226","https://openalex.org/W4312847199","https://openalex.org/W4320712824","https://openalex.org/W4360831795","https://openalex.org/W4360832001","https://openalex.org/W4360995299","https://openalex.org/W4362500802","https://openalex.org/W4378804782","https://openalex.org/W4385187240","https://openalex.org/W4387338457","https://openalex.org/W4391468153","https://openalex.org/W4391644593","https://openalex.org/W4394625669","https://openalex.org/W4394829607","https://openalex.org/W4395113676","https://openalex.org/W4404102506"],"related_works":[],"abstract_inverted_index":{"Vision":[0],"Transformers":[1],"(ViTs)":[2],"effectively":[3],"capture":[4],"global":[5],"context":[6],"in":[7,29,101,165,219],"videos/images":[8],"using":[9,176],"attention":[10,21,71],"mechanisms,":[11],"outperforming":[12],"the":[13,20,65,93,122,133,177],"traditional":[14],"Convolutional":[15],"Neural":[16],"Networks":[17],"(CNNs).":[18],"However,":[19],"mechanism's":[22],"dependence":[23],"on":[24,192],"multiple":[25],"matrix":[26,217],"multiplications":[27],"results":[28],"high":[30],"computational":[31],"complexity,":[32],"posing":[33],"a":[34,44,56,85,138,146,183],"significant":[35,214],"challenge":[36],"for":[37,72,89,208],"edge":[38],"computing":[39],"devices.":[40],"This":[41],"paper":[42],"proposes":[43],"hardware-software":[45],"co-design":[46,221],"approach":[47],"to":[48,107,120,161,203],"mitigate":[49],"this":[50],"complexity.":[51],"The":[52,76,171,194],"software":[53],"part":[54],"utilizes":[55],"50%":[57],"sparsity":[58,160],"pattern,":[59],"which":[60,91,156],"doubles":[61],"throughput,":[62],"and":[63,136,168,189,200,226],"replaces":[64],"softmax":[66],"function":[67],"with":[68,205],"linear":[69],"Taylor":[70],"enhanced":[73],"hardware":[74,77],"efficiency.":[75],"design":[78],"introduces":[79],"Recursive":[80,148],"Array":[81,96,149],"Matrix":[82,97,150],"Multiplier":[83,98,151],"(RAMM),":[84],"new":[86],"architecture":[87,196],"tailored":[88],"ViTs,":[90],"outperforms":[92],"conventional":[94],"Systolic":[95],"(SAMM)":[99],"used":[100],"similar":[102],"works.":[103],"RAMM":[104,212],"is":[105,153],"applicable":[106],"any":[108],"deep":[109],"neural":[110],"network":[111],"(DNN)":[112],"architecture,":[113],"but":[114],"since":[115],"ViTs":[116],"are":[117,174],"recent":[118],"models,":[119],"demonstrate":[121],"versatility":[123],"of":[124,186],"RAMM,":[125,204],"we":[126],"integrate":[127],"it":[128],"into":[129],"two":[130],"ViT":[131,135],"variants:":[132],"original":[134],"MaxViT,":[137],"hybrid":[139],"model":[140],"that":[141],"incorporates":[142],"convolution.":[143],"In":[144],"addition,":[145],"Sparse":[147],"(SpaRMM),":[152],"also":[154,190],"designed,":[155],"leverages":[157],"2:4":[158],"structured":[159],"achieve":[162],"additional":[163],"reductions":[164],"processing":[166],"units":[167],"power":[169],"consumption.":[170],"proposed":[172],"architectures":[173],"synthesized":[175],"AMS":[178],"90":[179],"nm":[180],"library":[181],"at":[182],"clock":[184],"frequency":[185],"500":[187],"MHz":[188],"implemented":[191],"FPGA.":[193],"SpaRMM":[195],"achieves":[197],"superior":[198],"latency":[199],"throughput":[201],"compared":[202],"increasing":[206],"efficiency":[207],"higher-order":[209],"matrices.":[210],"Furthermore,":[211],"demonstrates":[213],"improvements":[215],"over":[216],"multipliers":[218],"state-of-the-art":[220],"frameworks,":[222],"such":[223],"as":[224],"SpAtten":[225],"<inline-formula":[227],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[228],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[229],"notation=\"LaTeX\">$A^{3}$</tex-math></inline-formula>.":[230]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-01-28T00:00:00"}
