{"id":"https://openalex.org/W4404787821","doi":"https://doi.org/10.1109/taslp.2024.3507566","title":"Blind Audio Bandwidth Extension: A Diffusion-Based Zero-Shot Approach","display_name":"Blind Audio Bandwidth Extension: A Diffusion-Based Zero-Shot Approach","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4404787821","doi":"https://doi.org/10.1109/taslp.2024.3507566"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3507566","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3507566","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1109/taslp.2024.3507566","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030929567","display_name":"Eloi Moliner","orcid":"https://orcid.org/0000-0001-5719-326X"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":true,"raw_author_name":"Eloi Moliner","raw_affiliation_strings":["Acoustics Laboratory, Department of Information and Communications Engineering, Aalto University, Espoo, Finland"],"raw_orcid":"https://orcid.org/0000-0001-5719-326X","affiliations":[{"raw_affiliation_string":"Acoustics Laboratory, Department of Information and Communications Engineering, Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013286528","display_name":"Filip Elvander","orcid":"https://orcid.org/0000-0003-1857-2173"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Filip Elvander","raw_affiliation_strings":["Acoustics Laboratory, Department of Information and Communications Engineering, Aalto University, Espoo, Finland"],"raw_orcid":"https://orcid.org/0000-0003-1857-2173","affiliations":[{"raw_affiliation_string":"Acoustics Laboratory, Department of Information and Communications Engineering, Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021963945","display_name":"Vesa V\u00e4lim\u00e4ki","orcid":"https://orcid.org/0000-0002-7869-292X"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Vesa V\u00e4lim\u00e4ki","raw_affiliation_strings":["Acoustics Laboratory, Department of Information and Communications Engineering, Aalto University, Espoo, Finland"],"raw_orcid":"https://orcid.org/0000-0002-7869-292X","affiliations":[{"raw_affiliation_string":"Acoustics Laboratory, Department of Information and Communications Engineering, Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5030929567"],"corresponding_institution_ids":["https://openalex.org/I9927081"],"apc_list":null,"apc_paid":null,"fwci":2.5413,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.90827052,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"32","issue":null,"first_page":"5092","last_page":"5105"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10688","display_name":"Image and Signal Denoising Methods","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9948999881744385,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bandwidth-extension","display_name":"Bandwidth extension","score":0.670138955116272},{"id":"https://openalex.org/keywords/extension","display_name":"Extension (predicate logic)","score":0.6446678042411804},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.5299844741821289},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.4956444501876831},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.4651748836040497},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.46099236607551575},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.30431944131851196},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.2228284776210785},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.20256146788597107},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.17833486199378967},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.09562888741493225},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.071692556142807},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.06221628189086914},{"id":"https://openalex.org/keywords/quantum-mechanics","display_name":"Quantum mechanics","score":0.054504334926605225}],"concepts":[{"id":"https://openalex.org/C9387945","wikidata":"https://www.wikidata.org/wiki/Q4854770","display_name":"Bandwidth extension","level":4,"score":0.670138955116272},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.6446678042411804},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.5299844741821289},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.4956444501876831},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.4651748836040497},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.46099236607551575},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.30431944131851196},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.2228284776210785},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.20256146788597107},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.17833486199378967},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.09562888741493225},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.071692556142807},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.06221628189086914},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.054504334926605225},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/taslp.2024.3507566","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3507566","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:lirias2repo.kuleuven.be:20.500.12942/766234","is_oa":true,"landing_page_url":"https://lirias.kuleuven.be/handle/20.500.12942/766234","pdf_url":"https://lirias.kuleuven.be/retrieve/6a6fde55-9468-4f28-9bfa-e17495b5d91c","source":{"id":"https://openalex.org/S4306401954","display_name":"Lirias (KU Leuven)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I99464096","host_organization_name":"KU Leuven","host_organization_lineage":["https://openalex.org/I99464096"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Ieee-Acm Transactions On Audio Speech And Language Processing, vol. 32, (5092-5105)","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:aaltodoc.aalto.fi:123456789/132386","is_oa":true,"landing_page_url":"https://research.aalto.fi/en/publications/3317fe5e-6112-4799-afa0-0c4fcabc861b","pdf_url":null,"source":{"id":"https://openalex.org/S4306401662","display_name":"Aaltodoc (Aalto University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9927081","host_organization_name":"Aalto University","host_organization_lineage":["https://openalex.org/I9927081"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"publishedVersion"}],"best_oa_location":{"id":"doi:10.1109/taslp.2024.3507566","is_oa":true,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3507566","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":72,"referenced_works":["https://openalex.org/W1505878979","https://openalex.org/W1517841224","https://openalex.org/W1522137499","https://openalex.org/W1994997782","https://openalex.org/W2098990405","https://openalex.org/W2100285470","https://openalex.org/W2147152002","https://openalex.org/W2167341025","https://openalex.org/W2171815504","https://openalex.org/W2400346108","https://openalex.org/W2740337320","https://openalex.org/W2929274168","https://openalex.org/W2998639482","https://openalex.org/W3004970274","https://openalex.org/W3006440693","https://openalex.org/W3094550259","https://openalex.org/W3102195007","https://openalex.org/W3115185819","https://openalex.org/W3125868443","https://openalex.org/W3160652646","https://openalex.org/W3163243746","https://openalex.org/W3191805365","https://openalex.org/W3197990672","https://openalex.org/W4221155904","https://openalex.org/W4224133928","https://openalex.org/W4225288502","https://openalex.org/W4238906441","https://openalex.org/W4249994567","https://openalex.org/W4281820413","https://openalex.org/W4283215837","https://openalex.org/W4285412734","https://openalex.org/W4287510253","https://openalex.org/W4298182583","https://openalex.org/W4302095631","https://openalex.org/W4312293341","https://openalex.org/W4313142698","https://openalex.org/W4319989813","https://openalex.org/W4321177576","https://openalex.org/W4372260387","https://openalex.org/W4372263438","https://openalex.org/W4372268522","https://openalex.org/W4372341094","https://openalex.org/W4372341905","https://openalex.org/W4375869120","https://openalex.org/W4375869358","https://openalex.org/W4380434618","https://openalex.org/W4384080510","https://openalex.org/W4386066264","https://openalex.org/W4392562815","https://openalex.org/W4402671601","https://openalex.org/W6629271464","https://openalex.org/W6675033622","https://openalex.org/W6680067488","https://openalex.org/W6755182157","https://openalex.org/W6765775151","https://openalex.org/W6779823529","https://openalex.org/W6786375611","https://openalex.org/W6809940947","https://openalex.org/W6810708459","https://openalex.org/W6810793953","https://openalex.org/W6838452192","https://openalex.org/W6838766441","https://openalex.org/W6840155194","https://openalex.org/W6844035133","https://openalex.org/W6844364769","https://openalex.org/W6849109464","https://openalex.org/W6849635556","https://openalex.org/W6849909072","https://openalex.org/W6850106270","https://openalex.org/W6851069163","https://openalex.org/W6852245216","https://openalex.org/W6863210012"],"related_works":["https://openalex.org/W2059847929","https://openalex.org/W4319862422","https://openalex.org/W1971598839","https://openalex.org/W2133828239","https://openalex.org/W3207737697","https://openalex.org/W2017964352","https://openalex.org/W3144767625","https://openalex.org/W2097303478","https://openalex.org/W2900695998","https://openalex.org/W2056058148"],"abstract_inverted_index":{"Audio":[0,42],"bandwidth":[1,110],"extension":[2,111],"involves":[3],"the":[4,16,47,55,65,78,91,102,139,147,158,171,177],"realistic":[5],"reconstruction":[6],"of":[7,58,73,90,161,166],"high-frequency":[8,141],"spectra":[9],"from":[10],"bandlimited":[11],"observations.":[12],"In":[13],"cases":[14],"where":[15,77],"lowpass":[17],"degradation":[18,79],"is":[19,81,94],"unknown,":[20],"such":[21],"as":[22],"in":[23,50],"restoring":[24],"historical":[25,135,162,167],"audio":[26,159],"recordings,":[27,136],"this":[28],"becomes":[29],"a":[30,36,51,59,70],"blind":[31,48,109],"problem.":[32],"This":[33],"paper":[34],"introduces":[35],"novel":[37],"method":[38,93,173],"called":[39],"BABE":[40,68,106,127,155],"(Blind":[41],"Bandwidth":[43],"Extension)":[44],"that":[45,105,154],"addresses":[46],"problem":[49],"zero-shot":[52],"setting,":[53],"leveraging":[54],"generative":[56],"priors":[57],"pre-trained":[60],"unconditional":[61],"diffusion":[62,74],"model.":[63],"During":[64],"inference":[66],"process,":[67],"utilizes":[69],"generalized":[71],"version":[72],"posterior":[75],"sampling,":[76],"operator":[80],"unknown":[82],"but":[83],"parametrized":[84],"and":[85,98,101,113],"inferred":[86],"iteratively.":[87],"The":[88],"performance":[89,116],"proposed":[92,172],"evaluated":[95],"using":[96],"objective":[97],"subjective":[99],"metrics,":[100],"results":[103],"show":[104],"surpasses":[107],"state-of-the-art":[108],"baselines":[112],"achieves":[114],"competitive":[115],"compared":[117],"to":[118],"informed":[119],"methods":[120],"when":[121,132],"tested":[122],"with":[123,146,170],"synthetic":[124],"data.":[125],"Moreover,":[126],"exhibits":[128],"robust":[129],"generalization":[130],"capabilities":[131],"enhancing":[133],"real":[134],"effectively":[137],"reconstructing":[138],"missing":[140],"content":[142],"while":[143],"maintaining":[144],"coherence":[145],"original":[148],"recording.":[149],"Subjective":[150],"preference":[151],"tests":[152],"confirm":[153],"significantly":[156],"improves":[157],"quality":[160],"music":[163],"recordings.":[164],"Examples":[165],"recordings":[168],"restored":[169],"are":[174],"available":[175],"on":[176],"companion":[178],"webpage:":[179],"<uri":[180],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[181],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">http://research.spa.aalto.fi/publications/papers/ieee-taslp-babe/</uri>":[182]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":1}],"updated_date":"2026-05-15T08:27:34.491423","created_date":"2024-11-28T00:00:00"}
