{"id":"https://openalex.org/W7147477145","doi":"https://doi.org/10.1109/access.2026.3679674","title":"Architectural Scalability and Attention Stabilization for Robust Impulsive Acoustic Spectrogram Classification Using Swin Transformer V2","display_name":"Architectural Scalability and Attention Stabilization for Robust Impulsive Acoustic Spectrogram Classification Using Swin Transformer V2","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7147477145","doi":"https://doi.org/10.1109/access.2026.3679674"},"language":"en","primary_location":{"id":"doi:10.1109/access.2026.3679674","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3679674","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2026.3679674","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085053950","display_name":"Pafan Doungpaisan","orcid":"https://orcid.org/0000-0002-8969-2984"},"institutions":[{"id":"https://openalex.org/I82828225","display_name":"King Mongkut's University of Technology North Bangkok","ror":"https://ror.org/04fy6jb97","country_code":"TH","type":"education","lineage":["https://openalex.org/I82828225"]}],"countries":["TH"],"is_corresponding":true,"raw_author_name":"Pafan Doungpaisan","raw_affiliation_strings":["Faculty of Industrial Technology and Management, King Mongkut&#x2019;s University of Technology North Bangkok (KMUTNB), Bangkok, Thailand"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Industrial Technology and Management, King Mongkut&#x2019;s University of Technology North Bangkok (KMUTNB), Bangkok, Thailand","institution_ids":["https://openalex.org/I82828225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063844057","display_name":"Peerapol Khunarsa","orcid":"https://orcid.org/0000-0002-4252-078X"},"institutions":[{"id":"https://openalex.org/I176205391","display_name":"Uttaradit Rajabhat University","ror":"https://ror.org/01rs03g07","country_code":"TH","type":"education","lineage":["https://openalex.org/I176205391"]}],"countries":["TH"],"is_corresponding":false,"raw_author_name":"Peerapol Khunarsa","raw_affiliation_strings":["Faculty of Science and Technology, Uttaradit Rajabhat University, Uttaradit, Thailand"],"raw_orcid":"https://orcid.org/0000-0002-4252-078X","affiliations":[{"raw_affiliation_string":"Faculty of Science and Technology, Uttaradit Rajabhat University, Uttaradit, Thailand","institution_ids":["https://openalex.org/I176205391"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5085053950"],"corresponding_institution_ids":["https://openalex.org/I82828225"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.61305616,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"14","issue":null,"first_page":"51558","last_page":"51575"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.295199990272522,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.295199990272522,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12205","display_name":"Time Series Analysis and Forecasting","score":0.07729999721050262,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10220","display_name":"Machine Fault Diagnosis Techniques","score":0.04809999838471413,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.9463000297546387},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6610999703407288},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5321000218391418},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.46880000829696655},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.46299999952316284},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.391400009393692},{"id":"https://openalex.org/keywords/broadband","display_name":"Broadband","score":0.35749998688697815},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.35280001163482666}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.9463000297546387},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7387999892234802},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6610999703407288},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5321000218391418},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.46880000829696655},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.46299999952316284},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4453999996185303},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4041999876499176},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.391400009393692},{"id":"https://openalex.org/C509933004","wikidata":"https://www.wikidata.org/wiki/Q194163","display_name":"Broadband","level":2,"score":0.35749998688697815},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.35280001163482666},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.33489999175071716},{"id":"https://openalex.org/C102519508","wikidata":"https://www.wikidata.org/wiki/Q6520159","display_name":"Fourier transform","level":2,"score":0.32679998874664307},{"id":"https://openalex.org/C134652429","wikidata":"https://www.wikidata.org/wiki/Q1052698","display_name":"Jitter","level":2,"score":0.3192000091075897},{"id":"https://openalex.org/C142433447","wikidata":"https://www.wikidata.org/wiki/Q7806653","display_name":"Time\u2013frequency analysis","level":3,"score":0.2906999886035919},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C166386157","wikidata":"https://www.wikidata.org/wiki/Q1477735","display_name":"Short-time Fourier transform","level":4,"score":0.2831000089645386},{"id":"https://openalex.org/C75172450","wikidata":"https://www.wikidata.org/wiki/Q623950","display_name":"Fast Fourier transform","level":2,"score":0.2791999876499176},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.27410000562667847},{"id":"https://openalex.org/C198386975","wikidata":"https://www.wikidata.org/wiki/Q117785","display_name":"Finite impulse response","level":2,"score":0.25029999017715454}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2026.3679674","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3679674","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:a26b21b883ee43a5bdd1efbc0a46884a","is_oa":true,"landing_page_url":"https://doaj.org/article/a26b21b883ee43a5bdd1efbc0a46884a","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 14, Pp 51558-51575 (2026)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2026.3679674","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3679674","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Impulsive":[0],"acoustic":[1,64,182,197],"event":[2],"classification":[3,136],"based":[4],"on":[5,34],"spectrogram":[6,65,146,164],"representations":[7],"remains":[8],"challenging":[9],"due":[10],"to":[11],"the":[12,41,50,150,170,188],"non-stationary":[13],"and":[14,53,103,115,138,160,184],"broadband":[15],"characteristics":[16],"of":[17,43,57,172,191],"transient":[18],"signals.":[19],"While":[20],"convolutional":[21,36],"neural":[22],"networks":[23],"(CNNs)":[24],"have":[25],"been":[26],"widely":[27,106],"adopted":[28],"for":[29,61,175,195],"spectrogram-based":[30,196],"classification,":[31],"their":[32],"reliance":[33],"local":[35],"receptive":[37],"fields":[38],"may":[39],"limit":[40],"modeling":[42,176],"long-range":[44],"spectro\u2013temporal":[45,178],"dependencies.":[46],"This":[47],"study":[48],"investigates":[49],"architectural":[51,189],"scalability":[52],"attention":[54,152],"stabilization":[55],"properties":[56],"Swin":[58,98,130,192],"Transformer":[59,99,131,193],"V2":[60,132],"robust":[62],"impulsive":[63,181],"classification.":[66,198],"A":[67],"comprehensive":[68],"experimental":[69],"framework":[70],"is":[71],"developed":[72],"using":[73],"twelve":[74],"time\u2013frequency":[75],"representations,":[76],"including":[77,111],"Bark,":[78],"Chromagram,":[79],"Cochleagram,":[80],"Constant-Q":[81],"Transform":[82,93],"(CQT),":[83],"Delta,":[84],"Delta-Delta,":[85],"Log-Mel,":[86],"Mel,":[87],"MFCC,":[88],"Spectral":[89],"Contrast,":[90],"Short-Time":[91],"Fourier":[92],"(STFT),":[94],"andWavelet":[95],"spectrograms.":[96],"Multiple":[97],"configurations":[100],"are":[101],"evaluated":[102],"compared":[104],"with":[105,122],"used":[107],"CNN":[108,142],"baseline":[109],"architectures,":[110],"ResNet50,":[112],"ResNet101,":[113],"EfficientNet-B0,":[114],"InceptionResNetV2,":[116],"under":[117,163],"a":[118],"unified":[119],"training":[120],"protocol":[121],"five-fold":[123],"stratified":[124],"cross-validation.":[125],"Experimental":[126],"results":[127],"demonstrate":[128],"that":[129],"consistently":[133],"achieves":[134],"higher":[135],"accuracy":[137],"macro-F1":[139],"scores":[140],"than":[141],"baselines":[143],"across":[144,157],"diverse":[145],"representations.":[147],"In":[148],"addition,":[149],"hierarchical":[151],"mechanism":[153],"provides":[154],"improved":[155],"stability":[156],"data":[158],"partitions":[159],"enhanced":[161],"robustness":[162],"resolution":[165],"variability.":[166],"These":[167],"findings":[168],"highlight":[169],"effectiveness":[171],"transformer-based":[173],"architectures":[174],"complex":[177],"structures":[179],"in":[180],"signals":[183],"provide":[185],"insights":[186],"into":[187],"behavior":[190],"models":[194]},"counts_by_year":[],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2026-04-02T00:00:00"}
