{"id":"https://openalex.org/W4408354697","doi":"https://doi.org/10.1109/icassp49660.2025.10890511","title":"Wave-U-Mamba: An End-To-End Framework For High-Quality And Efficient Speech Super Resolution","display_name":"Wave-U-Mamba: An End-To-End Framework For High-Quality And Efficient Speech Super Resolution","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408354697","doi":"https://doi.org/10.1109/icassp49660.2025.10890511"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10890511","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890511","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075606753","display_name":"Yongjoon Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I197347611","display_name":"Korea University","ror":"https://ror.org/047dqcg40","country_code":"KR","type":"education","lineage":["https://openalex.org/I197347611"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Yongjoon Lee","raw_affiliation_strings":["Korea University,Department of Statistics,Seoul,South Korea"],"affiliations":[{"raw_affiliation_string":"Korea University,Department of Statistics,Seoul,South Korea","institution_ids":["https://openalex.org/I197347611"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100684420","display_name":"Chan-Woo Kim","orcid":"https://orcid.org/0000-0002-7858-9885"},"institutions":[{"id":"https://openalex.org/I197347611","display_name":"Korea University","ror":"https://ror.org/047dqcg40","country_code":"KR","type":"education","lineage":["https://openalex.org/I197347611"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Chanwoo Kim","raw_affiliation_strings":["Korea University,Department of Artificial Intelligence,Seoul,South Korea"],"affiliations":[{"raw_affiliation_string":"Korea University,Department of Artificial Intelligence,Seoul,South Korea","institution_ids":["https://openalex.org/I197347611"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5075606753"],"corresponding_institution_ids":["https://openalex.org/I197347611"],"apc_list":null,"apc_paid":null,"fwci":1.561,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.7845805,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10901","display_name":"Advanced Data Compression Techniques","score":0.980400025844574,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9704999923706055,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.7750341892242432},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5474534630775452},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.42378517985343933},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.36743783950805664},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.144902765750885},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.12753203511238098}],"concepts":[{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.7750341892242432},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5474534630775452},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.42378517985343933},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.36743783950805664},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.144902765750885},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.12753203511238098},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10890511","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10890511","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W2535388113","https://openalex.org/W2962935966","https://openalex.org/W2963609956","https://openalex.org/W2964243274","https://openalex.org/W3015338123","https://openalex.org/W3015837673","https://openalex.org/W3160652646","https://openalex.org/W3197334236","https://openalex.org/W3197912330","https://openalex.org/W3197990672","https://openalex.org/W4221155904","https://openalex.org/W4283215837","https://openalex.org/W4392903177","https://openalex.org/W4403421706","https://openalex.org/W4404586723","https://openalex.org/W6695676441","https://openalex.org/W6741681139","https://openalex.org/W6751512325","https://openalex.org/W6757817989","https://openalex.org/W6767111847","https://openalex.org/W6772349387","https://openalex.org/W6778823374","https://openalex.org/W6780218876","https://openalex.org/W6780226713","https://openalex.org/W6783867762","https://openalex.org/W6784457260","https://openalex.org/W6785363610","https://openalex.org/W6793736971","https://openalex.org/W6797790494","https://openalex.org/W6810325043","https://openalex.org/W6859298233","https://openalex.org/W6860915106","https://openalex.org/W6861213978","https://openalex.org/W6861387779","https://openalex.org/W6863493712","https://openalex.org/W6866746970"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2151749779","https://openalex.org/W3179968364","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W4404782863"],"abstract_inverted_index":{"Speech":[0],"Super-Resolution":[1],"(SSR)":[2],"is":[3],"a":[4,24,63,148],"task":[5],"of":[6,158],"enhancing":[7],"low-resolution":[8,101],"speech":[9,29,139],"signals":[10],"by":[11,23,52],"restoring":[12],"missing":[13],"high-frequency":[14],"components.":[15],"Conventional":[16],"approaches":[17],"typically":[18],"reconstruct":[19],"log-mel":[20],"features,":[21],"followed":[22],"vocoder":[25],"that":[26,69,121],"generates":[27],"high-resolution":[28,138],"in":[30,44,73,160],"the":[31,48,94,161],"waveform":[32],"domain.":[33,75],"However,":[34],"as":[35,67,83],"mel":[36],"features":[37],"lack":[38],"phase":[39],"information,":[40],"this":[41],"can":[42],"result":[43],"performance":[45],"degradation":[46],"during":[47],"reconstruction":[49],"phase.":[50],"Motivated":[51],"recent":[53],"advances":[54],"with":[55,126,152],"Selective":[56],"State":[57],"Spaces":[58],"Models":[59],"(SSMs),":[60],"we":[61],"propose":[62],"method,":[64],"referred":[65],"to":[66,107],"Wave-U-Mamba":[68,89,132],"directly":[70],"performs":[71],"SSR":[72,125],"time":[74],"In":[76],"our":[77,122],"comparative":[78],"study,":[79],"including":[80],"models":[81,146],"such":[82],"WSRGlow,":[84],"NU-Wave":[85],"2,":[86],"and":[87,128],"AudioSR,":[88],"demonstrates":[90],"superior":[91],"performance,":[92],"achieving":[93],"lowest":[95],"Log-Spectral":[96],"Distance":[97],"(LSD)":[98],"across":[99],"various":[100],"sampling":[102],"rates,":[103],"ranging":[104],"from":[105],"8":[106],"24":[108],"kHz.":[109],"Additionally,":[110],"subjective":[111],"human":[112],"evaluations,":[113],"scored":[114],"using":[115],"Mean":[116],"Opinion":[117],"Score":[118],"(MOS)":[119],"reveal":[120],"method":[123],"produces":[124],"natural":[127],"human-like":[129],"quality.":[130],"Furthermore,":[131],"achieves":[133],"these":[134],"results":[135],"while":[136],"generating":[137],"over":[140],"nine":[141],"times":[142],"faster":[143],"than":[144,156],"baseline":[145,162],"on":[147],"single":[149],"A100":[150],"GPU,":[151],"parameter":[153],"sizes":[154],"less":[155],"2%":[157],"those":[159],"models.":[163]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
