{"id":"https://openalex.org/W4416251150","doi":"https://doi.org/10.1109/ijcnn64981.2025.11229104","title":"Lightweight Transformer with Enhanced Inverted Residual Blocks for Bird Sound Recognition","display_name":"Lightweight Transformer with Enhanced Inverted Residual Blocks for Bird Sound Recognition","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416251150","doi":"https://doi.org/10.1109/ijcnn64981.2025.11229104"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11229104","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11229104","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103528545","display_name":"Xiangyu Cheng","orcid":"https://orcid.org/0009-0002-3450-0778"},"institutions":[{"id":"https://openalex.org/I111599522","display_name":"Jiangnan University","ror":"https://ror.org/04mkzax54","country_code":"CN","type":"education","lineage":["https://openalex.org/I111599522"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiangyu Cheng","raw_affiliation_strings":["Jiangnan University,Department of Artificial Intelligence and Computer Science,Wuxi,China"],"affiliations":[{"raw_affiliation_string":"Jiangnan University,Department of Artificial Intelligence and Computer Science,Wuxi,China","institution_ids":["https://openalex.org/I111599522"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107809751","display_name":"Shengnan Fan","orcid":null},"institutions":[{"id":"https://openalex.org/I111599522","display_name":"Jiangnan University","ror":"https://ror.org/04mkzax54","country_code":"CN","type":"education","lineage":["https://openalex.org/I111599522"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengnan Fan","raw_affiliation_strings":["Jiangnan University,Department of Artificial Intelligence and Computer Science,Wuxi,China"],"affiliations":[{"raw_affiliation_string":"Jiangnan University,Department of Artificial Intelligence and Computer Science,Wuxi,China","institution_ids":["https://openalex.org/I111599522"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027030834","display_name":"Jianyang Ding","orcid":"https://orcid.org/0000-0001-8582-150X"},"institutions":[{"id":"https://openalex.org/I111599522","display_name":"Jiangnan University","ror":"https://ror.org/04mkzax54","country_code":"CN","type":"education","lineage":["https://openalex.org/I111599522"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianyang Ding","raw_affiliation_strings":["Jiangnan University,Department of Artificial Intelligence and Computer Science,Wuxi,China"],"affiliations":[{"raw_affiliation_string":"Jiangnan University,Department of Artificial Intelligence and Computer Science,Wuxi,China","institution_ids":["https://openalex.org/I111599522"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046911338","display_name":"Qin Wu","orcid":"https://orcid.org/0000-0001-6350-6672"},"institutions":[{"id":"https://openalex.org/I111599522","display_name":"Jiangnan University","ror":"https://ror.org/04mkzax54","country_code":"CN","type":"education","lineage":["https://openalex.org/I111599522"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qin Wu","raw_affiliation_strings":["Jiangnan University,Department of Artificial Intelligence and Computer Science,Wuxi,China"],"affiliations":[{"raw_affiliation_string":"Jiangnan University,Department of Artificial Intelligence and Computer Science,Wuxi,China","institution_ids":["https://openalex.org/I111599522"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101995085","display_name":"Long Cheng","orcid":"https://orcid.org/0009-0004-4819-5406"},"institutions":[{"id":"https://openalex.org/I111599522","display_name":"Jiangnan University","ror":"https://ror.org/04mkzax54","country_code":"CN","type":"education","lineage":["https://openalex.org/I111599522"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Long Cheng","raw_affiliation_strings":["Jiangnan University,Department of Artificial Intelligence and Computer Science,Wuxi,China"],"affiliations":[{"raw_affiliation_string":"Jiangnan University,Department of Artificial Intelligence and Computer Science,Wuxi,China","institution_ids":["https://openalex.org/I111599522"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024443501","display_name":"Zhilei Chai","orcid":"https://orcid.org/0000-0003-3822-1653"},"institutions":[{"id":"https://openalex.org/I111599522","display_name":"Jiangnan University","ror":"https://ror.org/04mkzax54","country_code":"CN","type":"education","lineage":["https://openalex.org/I111599522"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhilei Chai","raw_affiliation_strings":["Jiangnan University,Department of Artificial Intelligence and Computer Science,Wuxi,China"],"affiliations":[{"raw_affiliation_string":"Jiangnan University,Department of Artificial Intelligence and Computer Science,Wuxi,China","institution_ids":["https://openalex.org/I111599522"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5103528545"],"corresponding_institution_ids":["https://openalex.org/I111599522"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.33274258,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11665","display_name":"Animal Vocal Communication and Behavior","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1309","display_name":"Developmental Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11665","display_name":"Animal Vocal Communication and Behavior","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1309","display_name":"Developmental Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10659","display_name":"Marine animal studies overview","score":0.0006000000284984708,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10089","display_name":"Avian ecology and behavior","score":0.0003000000142492354,"subfield":{"id":"https://openalex.org/subfields/2303","display_name":"Ecology"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.7296000123023987},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6556000113487244},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5738999843597412},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5691999793052673},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5249000191688538},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.39489999413490295},{"id":"https://openalex.org/keywords/multilayer-perceptron","display_name":"Multilayer perceptron","score":0.38769999146461487},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3662000000476837}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7414000034332275},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.7296000123023987},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6556000113487244},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5738999843597412},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5691999793052673},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5249000191688538},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4715000092983246},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.39489999413490295},{"id":"https://openalex.org/C179717631","wikidata":"https://www.wikidata.org/wiki/Q2991667","display_name":"Multilayer perceptron","level":3,"score":0.38769999146461487},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3662000000476837},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.3456999957561493},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3334999978542328},{"id":"https://openalex.org/C60908668","wikidata":"https://www.wikidata.org/wiki/Q690207","display_name":"Perceptron","level":3,"score":0.3138999938964844},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.31040000915527344},{"id":"https://openalex.org/C138236772","wikidata":"https://www.wikidata.org/wiki/Q25098575","display_name":"Edge device","level":3,"score":0.30799999833106995},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.27649998664855957},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2689000070095062},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.2547000050544739},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.25040000677108765}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11229104","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11229104","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W2052666245","https://openalex.org/W2194775991","https://openalex.org/W2593116425","https://openalex.org/W2963125010","https://openalex.org/W2963163009","https://openalex.org/W2963351448","https://openalex.org/W2982083293","https://openalex.org/W3094550259","https://openalex.org/W3138516171","https://openalex.org/W3174563152","https://openalex.org/W3176946833","https://openalex.org/W3196974791","https://openalex.org/W3205743929","https://openalex.org/W4226426325","https://openalex.org/W4226442948","https://openalex.org/W4283837884","https://openalex.org/W4296106947","https://openalex.org/W4312820606","https://openalex.org/W4317470305","https://openalex.org/W4386083031","https://openalex.org/W4387101372","https://openalex.org/W4390872670","https://openalex.org/W4392908114","https://openalex.org/W4402667890"],"related_works":[],"abstract_inverted_index":{"Bird":[0],"sound":[1],"recognition":[2],"relies":[3],"on":[4,53,160,192],"capturing":[5],"unique":[6],"characteristics":[7],"of":[8,42,128],"avian":[9],"vocalizations":[10],"to":[11,32,36,100,116],"achieve":[12,172],"accurate":[13],"species":[14],"identification.":[15],"Recent":[16],"advances":[17],"in":[18,56,136,144,177,182,189],"deep":[19],"learning":[20],"have":[21],"significantly":[22],"improved":[23],"classification":[24],"accuracy":[25],"and":[26,112,156,164,185],"Transformer-based":[27,68],"models":[28],"stand":[29],"out":[30],"due":[31],"their":[33],"superior":[34],"ability":[35],"model":[37,69,118,170],"long-range":[38],"dependencies.":[39],"However,":[40],"most":[41],"them":[43],"suffer":[44],"from":[45],"high":[46,198],"computational":[47,183],"complexity,":[48],"posing":[49],"challenges":[50],"for":[51,96],"deployment":[52],"edge":[54],"devices":[55],"the":[57,104,129,137,145,161],"wild.":[58],"To":[59,76],"address":[60],"this":[61,63,77],"issue,":[62],"paper":[64],"proposes":[65],"a":[66,92,174,186],"lightweight":[67],"incorporating":[70],"enhanced":[71],"Inverted":[72],"Residual":[73],"Blocks":[74],"(IRB).":[75],"end,":[78],"we":[79,90,107,121],"first":[80],"replace":[81],"original":[82],"Multilayer":[83],"Perceptron":[84],"(MLP)":[85],"modules":[86],"with":[87],"IRB.":[88],"Additionally,":[89],"propose":[91],"stage-wise":[93],"incremental":[94],"strategy":[95],"setting":[97],"expansion":[98],"factors":[99],"reduce":[101],"redundancy.":[102],"At":[103],"same":[105],"time,":[106],"incorporate":[108],"residual":[109],"connections":[110],"before":[111],"after":[113],"depthwise":[114],"convolutions":[115],"maintain":[117],"performance.":[119],"Furthermore,":[120],"refine":[122],"attention":[123,135],"configurations":[124],"throughout":[125],"various":[126],"phases":[127],"network.":[130],"We":[131],"strategically":[132],"minimize":[133],"redundant":[134],"initial":[138],"stages,":[139],"while":[140,196],"intensifying":[141],"its":[142],"application":[143],"crucial":[146],"stages.":[147],"This":[148],"approach":[149],"achieves":[150],"an":[151,179],"optimal":[152],"balance":[153],"between":[154],"computation":[155],"accuracy.":[157,199],"Experimental":[158],"results":[159],"Bird-CLEF2023,":[162],"DCASE2020,":[163],"Birdsdata":[165],"demonstrate":[166],"that":[167],"our":[168],"proposed":[169],"can":[171],"approximately":[173],"5-fold":[175],"reduction":[176],"parameters,":[178],"85%":[180],"decrease":[181],"load,":[184],"2.7-fold":[187],"increase":[188],"inference":[190],"speed":[191],"Jetson":[193],"AGX":[194],"Xavier,":[195],"preserving":[197]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
