{"id":"https://openalex.org/W4402402364","doi":"https://doi.org/10.1109/ialp63756.2024.10661187","title":"MambaGAN: Mamba based Metric GAN for Monaural Speech Enhancement","display_name":"MambaGAN: Mamba based Metric GAN for Monaural Speech Enhancement","publication_year":2024,"publication_date":"2024-08-04","ids":{"openalex":"https://openalex.org/W4402402364","doi":"https://doi.org/10.1109/ialp63756.2024.10661187"},"language":"en","primary_location":{"id":"doi:10.1109/ialp63756.2024.10661187","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ialp63756.2024.10661187","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Conference on Asian Language Processing (IALP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108983003","display_name":"Tianhao Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tianhao Luo","raw_affiliation_strings":["Harbin Engineering University,National Key Laboratory of Underwater Acoustic Technology,Harbin,China,15001"],"affiliations":[{"raw_affiliation_string":"Harbin Engineering University,National Key Laboratory of Underwater Acoustic Technology,Harbin,China,15001","institution_ids":["https://openalex.org/I151727225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080509511","display_name":"Feng Zhou","orcid":"https://orcid.org/0000-0002-1514-7393"},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Zhou","raw_affiliation_strings":["Harbin Engineering University,National Key Laboratory of Underwater Acoustic Technology,Harbin,China,15001"],"affiliations":[{"raw_affiliation_string":"Harbin Engineering University,National Key Laboratory of Underwater Acoustic Technology,Harbin,China,15001","institution_ids":["https://openalex.org/I151727225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041350492","display_name":"Zhongxin Bai","orcid":"https://orcid.org/0000-0003-3575-7014"},"institutions":[{"id":"https://openalex.org/I151727225","display_name":"Harbin Engineering University","ror":"https://ror.org/03x80pn82","country_code":"CN","type":"education","lineage":["https://openalex.org/I151727225"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhongxin Bai","raw_affiliation_strings":["Harbin Engineering University,National Key Laboratory of Underwater Acoustic Technology,Harbin,China,15001"],"affiliations":[{"raw_affiliation_string":"Harbin Engineering University,National Key Laboratory of Underwater Acoustic Technology,Harbin,China,15001","institution_ids":["https://openalex.org/I151727225"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5108983003"],"corresponding_institution_ids":["https://openalex.org/I151727225"],"apc_list":null,"apc_paid":null,"fwci":0.7787,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.70312673,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"411","last_page":"416"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13289","display_name":"Infant Health and Development","score":0.9950000047683716,"subfield":{"id":"https://openalex.org/subfields/3611","display_name":"Pharmacy"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.6735321283340454},{"id":"https://openalex.org/keywords/monaural","display_name":"Monaural","score":0.49545037746429443},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.49220767617225647},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.39695680141448975},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10671675205230713}],"concepts":[{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.6735321283340454},{"id":"https://openalex.org/C102894143","wikidata":"https://www.wikidata.org/wiki/Q1323979","display_name":"Monaural","level":2,"score":0.49545037746429443},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.49220767617225647},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.39695680141448975},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10671675205230713},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ialp63756.2024.10661187","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ialp63756.2024.10661187","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Conference on Asian Language Processing (IALP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320337504","display_name":"Research and Development","ror":"https://ror.org/027s68j25"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1552314771","https://openalex.org/W2067295501","https://openalex.org/W2094721231","https://openalex.org/W2109349638","https://openalex.org/W2144404214","https://openalex.org/W2149535104","https://openalex.org/W2889597349","https://openalex.org/W2937484199","https://openalex.org/W2962866211","https://openalex.org/W2963446712","https://openalex.org/W3032514799","https://openalex.org/W3064840847","https://openalex.org/W3096408984","https://openalex.org/W3158779859","https://openalex.org/W3160071434","https://openalex.org/W3161950572","https://openalex.org/W3191448984","https://openalex.org/W3197284240","https://openalex.org/W3206809722","https://openalex.org/W4232282348","https://openalex.org/W4245919820","https://openalex.org/W4285504888","https://openalex.org/W4289242435","https://openalex.org/W4296069347","https://openalex.org/W4296412923","https://openalex.org/W4313442864","https://openalex.org/W4372271367","https://openalex.org/W4385807442","https://openalex.org/W4385822340","https://openalex.org/W4386044615","https://openalex.org/W4389326242","https://openalex.org/W4395447416","https://openalex.org/W6756251360","https://openalex.org/W6762114000","https://openalex.org/W6782420349","https://openalex.org/W6843259600","https://openalex.org/W6855957182","https://openalex.org/W6859298233"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2036157531","https://openalex.org/W2056406069","https://openalex.org/W1974981856","https://openalex.org/W1518859147","https://openalex.org/W2045506488","https://openalex.org/W1983045063","https://openalex.org/W4321794819","https://openalex.org/W2401567014"],"abstract_inverted_index":{"Encoder-decoder":[0],"structures":[1],"are":[2],"widely":[3],"used":[4],"in":[5,64],"deep":[6],"neural":[7],"network-based":[8],"speech":[9,40,82],"enhancement":[10,41],"(SE),":[11],"often":[12,27],"utilizing":[13],"convolutional":[14,77],"and":[15,48],"transformer":[16,26,63],"modules":[17,78],"as":[18],"basic":[19],"components.":[20],"The":[21],"high":[22],"computational":[23,120],"demands":[24],"of":[25,100,115],"limit":[28],"their":[29],"real-time":[30],"performance.":[31],"To":[32],"address":[33],"this":[34],"issue,":[35],"we":[36],"propose":[37],"a":[38,51,56,110],"novel":[39],"network":[42],"called":[43],"MambaGAN":[44,94],"by":[45],"combining":[46],"MambaFormer":[47,54],"ODConv":[49],"within":[50],"GAN":[52],"framework.":[53],"is":[55,73],"structure":[57],"based":[58],"on":[59,88],"Mamba":[60],"to":[61,75],"replace":[62,76],"SE":[65],"networks.":[66],"Additionally,":[67],"an":[68,96],"Omni-dimensional":[69],"Dynamic":[70],"Convolution":[71],"(ODConv)":[72],"introduced":[74],"for":[79],"capturing":[80],"richer":[81],"features":[83],"more":[84],"flexibly.":[85],"Experimental":[86],"results":[87],"the":[89],"VoiceBank+DEMAND":[90],"dataset":[91],"show":[92],"that":[93],"achieved":[95,109],"impressive":[97],"PESQ":[98,113],"score":[99,114],"3.56.":[101],"When":[102],"combined":[103],"with":[104],"perceptual":[105],"contrast":[106],"stretching,":[107],"it":[108],"new":[111],"state-of-the-art":[112],"3.72,":[116],"while":[117],"exhibiting":[118],"lower":[119],"complexity":[121],"than":[122],"existing":[123],"conformer-based":[124],"models.":[125]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
