{"id":"https://openalex.org/W4416252251","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228602","title":"Mamba-based Layer-wise Progressive Fusion Network with Depthwise Enhancement for Low-resource Speech Recognition","display_name":"Mamba-based Layer-wise Progressive Fusion Network with Depthwise Enhancement for Low-resource Speech Recognition","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416252251","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228602"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11228602","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228602","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046748258","display_name":"Xuanda Chen","orcid":"https://orcid.org/0000-0003-4330-1033"},"institutions":[{"id":"https://openalex.org/I4210105229","display_name":"City University of Hong Kong, Shenzhen Research Institute","ror":"https://ror.org/00xc0ma20","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210105229"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xuanda Chen","raw_affiliation_strings":["Shenzhen Research Institute of Shandong University,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Research Institute of Shandong University,Shenzhen,China","institution_ids":["https://openalex.org/I4210105229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111149111","display_name":"Dingxin Cheng","orcid":null},"institutions":[{"id":"https://openalex.org/I4210105229","display_name":"City University of Hong Kong, Shenzhen Research Institute","ror":"https://ror.org/00xc0ma20","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210105229"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dingxin Cheng","raw_affiliation_strings":["Shenzhen Research Institute of Shandong University,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Research Institute of Shandong University,Shenzhen,China","institution_ids":["https://openalex.org/I4210105229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101672546","display_name":"Fei Hou","orcid":"https://orcid.org/0000-0002-7077-463X"},"institutions":[{"id":"https://openalex.org/I4210158823","display_name":"Weihai Science and Technology Bureau","ror":"https://ror.org/05mx4xx46","country_code":"CN","type":"government","lineage":["https://openalex.org/I4210158823"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei Hou","raw_affiliation_strings":["Wendeng Branch of Weihai Public Security Bureau,Weihai,China"],"affiliations":[{"raw_affiliation_string":"Wendeng Branch of Weihai Public Security Bureau,Weihai,China","institution_ids":["https://openalex.org/I4210158823"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100735305","display_name":"Bin Jiang","orcid":"https://orcid.org/0000-0001-8438-0001"},"institutions":[{"id":"https://openalex.org/I4210105229","display_name":"City University of Hong Kong, Shenzhen Research Institute","ror":"https://ror.org/00xc0ma20","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210105229"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bin Jiang","raw_affiliation_strings":["Shenzhen Research Institute of Shandong University,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Research Institute of Shandong University,Shenzhen,China","institution_ids":["https://openalex.org/I4210105229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071219261","display_name":"Xuchen Li","orcid":"https://orcid.org/0000-0003-1212-8389"},"institutions":[{"id":"https://openalex.org/I4210105229","display_name":"City University of Hong Kong, Shenzhen Research Institute","ror":"https://ror.org/00xc0ma20","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210105229"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuchen Li","raw_affiliation_strings":["Shenzhen Research Institute of Shandong University,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Research Institute of Shandong University,Shenzhen,China","institution_ids":["https://openalex.org/I4210105229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100338152","display_name":"Wenyu Wang","orcid":"https://orcid.org/0000-0001-9821-3220"},"institutions":[{"id":"https://openalex.org/I4210105229","display_name":"City University of Hong Kong, Shenzhen Research Institute","ror":"https://ror.org/00xc0ma20","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210105229"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenyu Wang","raw_affiliation_strings":["Shenzhen Research Institute of Shandong University,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Research Institute of Shandong University,Shenzhen,China","institution_ids":["https://openalex.org/I4210105229"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102817271","display_name":"Meixia Qu","orcid":"https://orcid.org/0000-0001-7607-8195"},"institutions":[{"id":"https://openalex.org/I4210105229","display_name":"City University of Hong Kong, Shenzhen Research Institute","ror":"https://ror.org/00xc0ma20","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210105229"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meixia Qu","raw_affiliation_strings":["Shenzhen Research Institute of Shandong University,Shenzhen,China"],"affiliations":[{"raw_affiliation_string":"Shenzhen Research Institute of Shandong University,Shenzhen,China","institution_ids":["https://openalex.org/I4210105229"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5046748258"],"corresponding_institution_ids":["https://openalex.org/I4210105229"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.18328643,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8603000044822693,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8603000044822693,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.06440000236034393,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.007600000128149986,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fusion-mechanism","display_name":"Fusion mechanism","score":0.6301000118255615},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6139000058174133},{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.583899974822998},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5401999950408936},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.5392000079154968},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5360000133514404},{"id":"https://openalex.org/keywords/mechanism","display_name":"Mechanism (biology)","score":0.4530999958515167}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7576000094413757},{"id":"https://openalex.org/C173414695","wikidata":"https://www.wikidata.org/wiki/Q5510276","display_name":"Fusion mechanism","level":4,"score":0.6301000118255615},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6139000058174133},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.583899974822998},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5820000171661377},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5401999950408936},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.5392000079154968},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5385000109672546},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5360000133514404},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.4530999958515167},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3831000030040741},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34929999709129333},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.328000009059906},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.3230000138282776},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.3075000047683716},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.29120001196861267},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.29109999537467957},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2669999897480011}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11228602","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228602","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320329791","display_name":"Shenzhen Fundamental Research Program","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2515737026","https://openalex.org/W2936774411","https://openalex.org/W2963403664","https://openalex.org/W3037032032","https://openalex.org/W3094833745","https://openalex.org/W3095173472","https://openalex.org/W3096798607","https://openalex.org/W3097345069","https://openalex.org/W3097777922","https://openalex.org/W3197771105","https://openalex.org/W4297798704","https://openalex.org/W4385245566","https://openalex.org/W4385572536","https://openalex.org/W4385823255","https://openalex.org/W4392909513","https://openalex.org/W4402112499","https://openalex.org/W4403576833","https://openalex.org/W4405059050","https://openalex.org/W4406461253","https://openalex.org/W4408352482","https://openalex.org/W4408352636"],"related_works":[],"abstract_inverted_index":{"Existing":[0],"speech":[1,19,153],"recognition":[2],"models":[3],"struggle":[4],"to":[5,71,121,140],"capture":[6],"deep":[7],"implicit":[8],"representations":[9,113],"and":[10,48,83,95,160],"preserve":[11],"key":[12],"information":[13,84],"during":[14],"propagation":[15],"when":[16],"handling":[17],"low-resource":[18,22,29,122,129,152],"data.":[20],"Furthermore,":[21],"dialect":[23,131,163],"datasets":[24,159],"are":[25],"even":[26],"rarer":[27],"than":[28],"language":[30],"datasets.":[31],"To":[32],"address":[33],"these":[34],"issues,":[35],"we":[36],"propose":[37],"a":[38,66,127],"model":[39],"primarily":[40],"consisting":[41],"of":[42,137,145,168],"layer-wise":[43,60],"progressive":[44,61],"fusion":[45,62,69],"mamba":[46,81],"(LPFMamba)":[47],"the":[49,59,79,87,100,142,161,166],"concatenation-depthwise":[50],"enhancement":[51],"module":[52,63],"(CDEM),":[53],"named":[54],"LPFMamba-CDEM.":[55],"At":[56],"its":[57],"core,":[58],"(LPFM)":[64],"employs":[65],"hierarchical":[67],"selective":[68],"mechanism":[70,92],"integrate":[72],"local":[73],"features,":[74],"global":[75],"features":[76],"extracted":[77],"by":[78],"bidirectional":[80],"module,":[82],"propagated":[85],"from":[86],"preceding":[88],"LPFM":[89],"layer.":[90],"This":[91],"progressively":[93],"accumulates":[94],"propagates":[96],"effective":[97],"representations,":[98],"enhancing":[99],"model\u2019s":[101],"capacity":[102],"under":[103],"limited":[104],"data":[105],"conditions.":[106],"The":[107],"CDEM":[108],"further":[109],"enhances":[110],"high-level":[111],"feature":[112],"processed":[114],"through":[115],"multiple":[116,151],"encoder":[117],"layers,":[118],"increasing":[119],"adaptability":[120],"speech.":[123],"We":[124],"also":[125],"introduce":[126],"self-built":[128],"Jilu":[130,162],"dataset":[132],"with":[133],"approximately":[134],"34":[135],"hours":[136],"speech,":[138],"aiming":[139],"promote":[141],"equitable":[143],"dissemination":[144],"technology.":[146],"Extensive":[147],"experiments":[148],"conducted":[149],"on":[150],"datasets,":[154],"including":[155],"both":[156],"publicly":[157],"available":[158],"dataset,":[164],"demonstrate":[165],"effectiveness":[167],"our":[169],"approach.":[170]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-14T00:00:00"}
