{"id":"https://openalex.org/W4416250786","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228292","title":"SpaRTAN: Spatial Reinforcement Token-based Aggregation Network for Visual Recognition","display_name":"SpaRTAN: Spatial Reinforcement Token-based Aggregation Network for Visual Recognition","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416250786","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228292"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11228292","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228292","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119896827","display_name":"Quan Bi Pay","orcid":null},"institutions":[{"id":"https://openalex.org/I11662577","display_name":"Monash University Malaysia","ror":"https://ror.org/00yncr324","country_code":"MY","type":"education","lineage":["https://openalex.org/I11662577"]}],"countries":["MY"],"is_corresponding":true,"raw_author_name":"Quan Bi Pay","raw_affiliation_strings":["Monash University Malaysia,School of Information Technology"],"affiliations":[{"raw_affiliation_string":"Monash University Malaysia,School of Information Technology","institution_ids":["https://openalex.org/I11662577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023680401","display_name":"Vishnu Monn Baskaran","orcid":"https://orcid.org/0000-0001-6809-5817"},"institutions":[{"id":"https://openalex.org/I11662577","display_name":"Monash University Malaysia","ror":"https://ror.org/00yncr324","country_code":"MY","type":"education","lineage":["https://openalex.org/I11662577"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Vishnu Monn Baskaran","raw_affiliation_strings":["Monash University Malaysia,School of Information Technology"],"affiliations":[{"raw_affiliation_string":"Monash University Malaysia,School of Information Technology","institution_ids":["https://openalex.org/I11662577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045821623","display_name":"Junn Yong Loo","orcid":"https://orcid.org/0000-0001-9370-600X"},"institutions":[{"id":"https://openalex.org/I11662577","display_name":"Monash University Malaysia","ror":"https://ror.org/00yncr324","country_code":"MY","type":"education","lineage":["https://openalex.org/I11662577"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Junn Yong Loo","raw_affiliation_strings":["Monash University Malaysia,School of Information Technology"],"affiliations":[{"raw_affiliation_string":"Monash University Malaysia,School of Information Technology","institution_ids":["https://openalex.org/I11662577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069916756","display_name":"KokSheik Wong","orcid":"https://orcid.org/0000-0002-4893-2291"},"institutions":[{"id":"https://openalex.org/I11662577","display_name":"Monash University Malaysia","ror":"https://ror.org/00yncr324","country_code":"MY","type":"education","lineage":["https://openalex.org/I11662577"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"KokSheik Wong","raw_affiliation_strings":["Monash University Malaysia,School of Information Technology"],"affiliations":[{"raw_affiliation_string":"Monash University Malaysia,School of Information Technology","institution_ids":["https://openalex.org/I11662577"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077539496","display_name":"Simon See","orcid":"https://orcid.org/0000-0002-4958-9237"},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Simon See","raw_affiliation_strings":["NVIDIA AI Technology Center"],"affiliations":[{"raw_affiliation_string":"NVIDIA AI Technology Center","institution_ids":["https://openalex.org/I1304085615"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5119896827"],"corresponding_institution_ids":["https://openalex.org/I11662577"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37360658,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8912000060081482,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.8912000060081482,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.014299999922513962,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.006800000090152025,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7809000015258789},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5651000142097473},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4729999899864197},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.4562000036239624},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.43140000104904175},{"id":"https://openalex.org/keywords/spatial-analysis","display_name":"Spatial analysis","score":0.3659999966621399},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.35670000314712524},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.3549000024795532},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.32690000534057617}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7809000015258789},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7444000244140625},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6164000034332275},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5651000142097473},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4729999899864197},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.4562000036239624},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.43140000104904175},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.3659999966621399},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.35670000314712524},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.3549000024795532},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34689998626708984},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.32690000534057617},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.31869998574256897},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3098999857902527},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.3068000078201294},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.3028999865055084},{"id":"https://openalex.org/C2984784707","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Digit recognition","level":3,"score":0.2883000075817108},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.2856000065803528},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.2808000147342682},{"id":"https://openalex.org/C157899210","wikidata":"https://www.wikidata.org/wiki/Q1395022","display_name":"Convolutional code","level":3,"score":0.27090001106262207},{"id":"https://openalex.org/C10689553","wikidata":"https://www.wikidata.org/wiki/Q405953","display_name":"Spartan","level":3,"score":0.26570001244544983},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.2615000009536743},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.26109999418258667},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.26089999079704285},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.2606000006198883},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.25760000944137573},{"id":"https://openalex.org/C21780288","wikidata":"https://www.wikidata.org/wiki/Q5139731","display_name":"Coco","level":2,"score":0.25519999861717224},{"id":"https://openalex.org/C142575187","wikidata":"https://www.wikidata.org/wiki/Q3358290","display_name":"Pyramid (geometry)","level":2,"score":0.2535000145435333},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.25029999017715454}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11228292","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228292","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W2108598243","https://openalex.org/W2194775991","https://openalex.org/W2752782242","https://openalex.org/W2962858109","https://openalex.org/W2963163009","https://openalex.org/W3131500599","https://openalex.org/W3138516171","https://openalex.org/W3163465952","https://openalex.org/W4226334005","https://openalex.org/W4312312588","https://openalex.org/W4312423415","https://openalex.org/W4312443924","https://openalex.org/W4312560592","https://openalex.org/W4312853765","https://openalex.org/W4385245566","https://openalex.org/W4385346076","https://openalex.org/W4386076083","https://openalex.org/W4390872670","https://openalex.org/W4396918399","https://openalex.org/W4402351819","https://openalex.org/W4402754006"],"related_works":[],"abstract_inverted_index":{"The":[0,207],"resurgence":[1],"of":[2],"convolutional":[3],"neural":[4],"networks":[5],"(CNNs)":[6],"in":[7,56,144],"visual":[8],"recognition":[9],"tasks,":[10],"exemplified":[11],"by":[12,100,201],"ConvNeXt,":[13],"has":[14],"demonstrated":[15],"their":[16],"capability":[17],"to":[18,54,70,106,181],"rival":[19],"transformer-based":[20],"architectures":[21],"through":[22,185],"advanced":[23],"training":[24],"methodologies":[25],"and":[26,33,88,103,120,137,146,174],"ViTinspired":[27],"design":[28,84],"principles.":[29],"However,":[30],"both":[31],"CNNs":[32,48],"transformers":[34],"exhibit":[35],"a":[36,81],"simplicity":[37],"bias,":[38],"favoring":[39],"straightforward":[40],"features":[41,111],"over":[42],"complex":[43],"structural":[44],"representations.":[45],"Furthermore,":[46],"modern":[47],"often":[49],"integrate":[50],"MLP-like":[51],"blocks":[52,60],"akin":[53],"those":[55],"transformers,":[57],"but":[58],"these":[59,76],"suffer":[61],"from":[62],"significant":[63],"information":[64,90],"redundancies,":[65],"necessitating":[66],"high":[67],"expansion":[68],"ratios":[69],"sustain":[71],"competitive":[72,157],"performance.":[73,158],"To":[74],"address":[75],"limitations,":[77],"we":[78],"propose":[79],"SpaRTAN,":[80],"lightweight":[82],"architectural":[83],"that":[85,149],"enhances":[86],"spatial":[87,110],"channel-wise":[89,125],"processing.":[91],"SpaRTAN":[92,150,165],"employs":[93],"kernels":[94],"with":[95,170,203],"varying":[96],"receptive":[97],"fields,":[98],"controlled":[99],"kernel":[101],"size":[102],"dilation":[104],"factor,":[105],"capture":[107],"discriminative":[108,140],"multi-order":[109],"effectively.":[112],"A":[113],"wave-based":[114],"channel":[115],"aggregation":[116],"module":[117],"further":[118],"modulates":[119],"reinforces":[121],"pixel":[122],"interactions,":[123],"mitigating":[124],"redundancies.":[126],"Combining":[127],"the":[128,131,162,190,198],"two":[129],"modules,":[130],"proposed":[132],"network":[133],"can":[134],"efficiently":[135],"gather":[136],"dynamically":[138],"contextualize":[139],"features.":[141],"Experimental":[142],"results":[143],"ImageNet":[145],"COCO":[147,191],"demonstrate":[148],"achieves":[151,166,194],"remarkable":[152],"parameter":[153],"efficiency":[154],"while":[155],"maintaining":[156],"In":[159],"particular,":[160],"on":[161],"ImageNet-1k":[163],"benchmark,":[164,192],"77.":[167],"7%":[168],"accuracy":[169],"only":[171,204],"3.8M":[172],"parameters":[173],"approximately":[175],"1.0":[176],"GFLOPs,":[177],"demonstrating":[178],"its":[179],"ability":[180],"deliver":[182],"strong":[183],"performance":[184],"an":[186],"efficient":[187],"design.":[188],"On":[189],"it":[193],"50.0%":[195],"AP,":[196],"surpassing":[197],"previous":[199],"benchmark":[200],"1.2%":[202],"21.5M":[205],"parameters.":[206],"code":[208],"is":[209],"publicly":[210],"available":[211],"at":[212],"[https://github.com/henry-pay/SpaRTAN].":[213]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
