{"id":"https://openalex.org/W4392903291","doi":"https://doi.org/10.1109/icassp48485.2024.10446423","title":"VRDMG: Vocal Restoration via Diffusion Posterior Sampling with Multiple Guidance","display_name":"VRDMG: Vocal Restoration via Diffusion Posterior Sampling with Multiple Guidance","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903291","doi":"https://doi.org/10.1109/icassp48485.2024.10446423"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10446423","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446423","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048090174","display_name":"Carlos Hernandez-Olivan","orcid":"https://orcid.org/0000-0002-0235-2267"},"institutions":[{"id":"https://openalex.org/I255234318","display_name":"Universidad de Zaragoza","ror":"https://ror.org/012a91z28","country_code":"ES","type":"education","lineage":["https://openalex.org/I255234318"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Carlos Hernandez-Olivan","raw_affiliation_strings":["University of Zaragoza,Dept. of Electronic Engineering and Communications,Spain","Dept. of Electronic Engineering and Communications, University of Zaragoza, Spain"],"affiliations":[{"raw_affiliation_string":"University of Zaragoza,Dept. of Electronic Engineering and Communications,Spain","institution_ids":["https://openalex.org/I255234318"]},{"raw_affiliation_string":"Dept. of Electronic Engineering and Communications, University of Zaragoza, Spain","institution_ids":["https://openalex.org/I255234318"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102972958","display_name":"Koichi Saito","orcid":"https://orcid.org/0000-0002-8563-9262"},"institutions":[{"id":"https://openalex.org/I4210122684","display_name":"Sony Computer Science Laboratories","ror":"https://ror.org/02nc46417","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210122684"]},{"id":"https://openalex.org/I4210143797","display_name":"Sony (Japan)","ror":"https://ror.org/04wzv3n59","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210143797"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Koichi Saito","raw_affiliation_strings":["Sony AI,Tokyo,Japan","Sony AI, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony AI,Tokyo,Japan","institution_ids":["https://openalex.org/I4210122684","https://openalex.org/I4210143797"]},{"raw_affiliation_string":"Sony AI, Tokyo, Japan","institution_ids":["https://openalex.org/I4210122684"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031225419","display_name":"Naoki Murata","orcid":"https://orcid.org/0000-0001-7418-5173"},"institutions":[{"id":"https://openalex.org/I4210143797","display_name":"Sony (Japan)","ror":"https://ror.org/04wzv3n59","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210143797"]},{"id":"https://openalex.org/I4210122684","display_name":"Sony Computer Science Laboratories","ror":"https://ror.org/02nc46417","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210122684"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Naoki Murata","raw_affiliation_strings":["Sony AI,Tokyo,Japan","Sony AI, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony AI,Tokyo,Japan","institution_ids":["https://openalex.org/I4210122684","https://openalex.org/I4210143797"]},{"raw_affiliation_string":"Sony AI, Tokyo, Japan","institution_ids":["https://openalex.org/I4210122684"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044626752","display_name":"Chieh-Hsin Lai","orcid":"https://orcid.org/0009-0009-3059-929X"},"institutions":[{"id":"https://openalex.org/I4210122684","display_name":"Sony Computer Science Laboratories","ror":"https://ror.org/02nc46417","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210122684"]},{"id":"https://openalex.org/I4210143797","display_name":"Sony (Japan)","ror":"https://ror.org/04wzv3n59","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210143797"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Chieh-Hsin Lai","raw_affiliation_strings":["Sony AI,Tokyo,Japan","Sony AI, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony AI,Tokyo,Japan","institution_ids":["https://openalex.org/I4210122684","https://openalex.org/I4210143797"]},{"raw_affiliation_string":"Sony AI, Tokyo, Japan","institution_ids":["https://openalex.org/I4210122684"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017282176","display_name":"Marco A. Mart\u00ednez-Ram\u00edrez","orcid":null},"institutions":[{"id":"https://openalex.org/I4210122684","display_name":"Sony Computer Science Laboratories","ror":"https://ror.org/02nc46417","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210122684"]},{"id":"https://openalex.org/I4210143797","display_name":"Sony (Japan)","ror":"https://ror.org/04wzv3n59","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210143797"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Marco A. Mart\u00ednez-Ram\u00edrez","raw_affiliation_strings":["Sony AI,Tokyo,Japan","Sony AI, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony AI,Tokyo,Japan","institution_ids":["https://openalex.org/I4210122684","https://openalex.org/I4210143797"]},{"raw_affiliation_string":"Sony AI, Tokyo, Japan","institution_ids":["https://openalex.org/I4210122684"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103537853","display_name":"Wei\u2010Hsiang Liao","orcid":"https://orcid.org/0000-0003-4113-1894"},"institutions":[{"id":"https://openalex.org/I4210143797","display_name":"Sony (Japan)","ror":"https://ror.org/04wzv3n59","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210143797"]},{"id":"https://openalex.org/I4210122684","display_name":"Sony Computer Science Laboratories","ror":"https://ror.org/02nc46417","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210122684"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Wei-Hsiang Liao","raw_affiliation_strings":["Sony AI,Tokyo,Japan","Sony AI, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony AI,Tokyo,Japan","institution_ids":["https://openalex.org/I4210122684","https://openalex.org/I4210143797"]},{"raw_affiliation_string":"Sony AI, Tokyo, Japan","institution_ids":["https://openalex.org/I4210122684"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088754502","display_name":"Yuki Mitsufuji","orcid":"https://orcid.org/0000-0002-6806-6140"},"institutions":[{"id":"https://openalex.org/I4210143797","display_name":"Sony (Japan)","ror":"https://ror.org/04wzv3n59","country_code":"JP","type":"company","lineage":["https://openalex.org/I4210143797"]},{"id":"https://openalex.org/I4210122684","display_name":"Sony Computer Science Laboratories","ror":"https://ror.org/02nc46417","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210122684"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yuki Mitsufuji","raw_affiliation_strings":["Sony AI,Tokyo,Japan","Sony AI, Tokyo, Japan","Sony Group Corporation, Tokyo, Japan"],"affiliations":[{"raw_affiliation_string":"Sony AI,Tokyo,Japan","institution_ids":["https://openalex.org/I4210122684","https://openalex.org/I4210143797"]},{"raw_affiliation_string":"Sony AI, Tokyo, Japan","institution_ids":["https://openalex.org/I4210122684"]},{"raw_affiliation_string":"Sony Group Corporation, Tokyo, Japan","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5048090174"],"corresponding_institution_ids":["https://openalex.org/I255234318"],"apc_list":null,"apc_paid":null,"fwci":1.7227,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.83154049,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"596","last_page":"600"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11304","display_name":"Advanced Neuroimaging Techniques and Applications","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11304","display_name":"Advanced Neuroimaging Techniques and Applications","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9854000210762024,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11206","display_name":"Model Reduction and Neural Networks","score":0.9835000038146973,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7392950654029846},{"id":"https://openalex.org/keywords/bandwidth","display_name":"Bandwidth (computing)","score":0.48110145330429077},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.47228556871414185},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.46753212809562683},{"id":"https://openalex.org/keywords/distortion","display_name":"Distortion (music)","score":0.4413797855377197},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.43216824531555176},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.1205199658870697},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.08220347762107849}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7392950654029846},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.48110145330429077},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.47228556871414185},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.46753212809562683},{"id":"https://openalex.org/C126780896","wikidata":"https://www.wikidata.org/wiki/Q899871","display_name":"Distortion (music)","level":4,"score":0.4413797855377197},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.43216824531555176},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.1205199658870697},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.08220347762107849},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C194257627","wikidata":"https://www.wikidata.org/wiki/Q211554","display_name":"Amplifier","level":3,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10446423","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10446423","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5600000023841858,"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1505878979","https://openalex.org/W2067709094","https://openalex.org/W2740337320","https://openalex.org/W2963952344","https://openalex.org/W2964058413","https://openalex.org/W2972478942","https://openalex.org/W3004970274","https://openalex.org/W3043119899","https://openalex.org/W3181854487","https://openalex.org/W3191805365","https://openalex.org/W3196947919","https://openalex.org/W4285412734","https://openalex.org/W4297841477","https://openalex.org/W4298184813","https://openalex.org/W4298310324","https://openalex.org/W4312497550","https://openalex.org/W4360890968","https://openalex.org/W4372260037","https://openalex.org/W4372260387","https://openalex.org/W4372341905","https://openalex.org/W4390873784","https://openalex.org/W6679045638","https://openalex.org/W6713555227","https://openalex.org/W6741681139","https://openalex.org/W6779823529","https://openalex.org/W6786375611","https://openalex.org/W6809940947","https://openalex.org/W6810793953","https://openalex.org/W6838452192","https://openalex.org/W6842856065","https://openalex.org/W6844364769","https://openalex.org/W6849909072","https://openalex.org/W6850266965"],"related_works":["https://openalex.org/W2328889547","https://openalex.org/W3003858543","https://openalex.org/W1969547578","https://openalex.org/W4312306082","https://openalex.org/W2378029901","https://openalex.org/W2171488351","https://openalex.org/W3023793255","https://openalex.org/W2358848605","https://openalex.org/W4236414678","https://openalex.org/W2382477741"],"abstract_inverted_index":{"Restoring":[0],"degraded":[1],"music":[2,12,16,115],"signals":[3],"is":[4],"essential":[5],"to":[6,64,120],"enhance":[7],"audio":[8,127],"quality":[9],"for":[10,89,122],"downstream":[11],"manipulation.":[13],"Recent":[14],"diffusion-based":[15],"restoration":[17,41,116],"methods":[18,88,110],"have":[19],"demonstrated":[20],"impressive":[21],"performance,":[22],"and":[23,60,79,93,102],"among":[24],"them,":[25],"diffusion":[26,71],"posterior":[27],"sampling":[28],"(DPS)":[29],"stands":[30],"out":[31],"given":[32],"its":[33],"intrinsic":[34],"properties,":[35],"making":[36],"it":[37],"versatile":[38],"across":[39],"various":[40,98],"tasks.":[42],"In":[43,106],"this":[44],"paper,":[45],"we":[46],"identify":[47],"that":[48],"there":[49],"are":[50],"potential":[51],"issues":[52,67],"which":[53],"will":[54],"degrade":[55],"current":[56,113],"DPS-based":[57,114],"methods\u2019":[58],"performance":[59],"introduce":[61],"the":[62,66,75,80,90,112,125],"way":[63],"mitigate":[65],"inspired":[68],"by":[69],"diverse":[70],"guidance":[72],"techniques":[73],"including":[74],"RePaint":[76],"(RP)":[77],"strategy":[78],"Pseudoinverse-Guided":[81],"Diffusion":[82],"Models":[83],"(\u03a0GDM).":[84],"We":[85,118],"demonstrate":[86],"our":[87,109],"vocal":[91],"declipping":[92],"bandwidth":[94],"extension":[95],"tasks":[96],"under":[97],"levels":[99],"of":[100,124],"distortion":[101],"cutoff":[103],"frequency,":[104],"respectively.":[105],"both":[107],"tasks,":[108],"outperform":[111],"benchmarks.":[117],"refer":[119],"http://carlosholivan.github.io/demos/audio-restoration-2023.html":[121],"examples":[123],"restored":[126],"samples.":[128]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
