{"id":"https://openalex.org/W4392909975","doi":"https://doi.org/10.1109/icassp48485.2024.10448371","title":"ScoreDec: A Phase-Preserving High-Fidelity Audio Codec with a Generalized Score-Based Diffusion Post-Filter","display_name":"ScoreDec: A Phase-Preserving High-Fidelity Audio Codec with a Generalized Score-Based Diffusion Post-Filter","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392909975","doi":"https://doi.org/10.1109/icassp48485.2024.10448371"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10448371","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10448371","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003177601","display_name":"Yi-Chiao Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210143443","display_name":"Allegheny Valley Hospital","ror":"https://ror.org/038c39g95","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1310421338","https://openalex.org/I4210143443"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yi-Chiao Wu","raw_affiliation_strings":["Codec Avatars Lab,Pittsburgh,PA,USA","Codec Avatars Lab, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Codec Avatars Lab,Pittsburgh,PA,USA","institution_ids":["https://openalex.org/I4210143443"]},{"raw_affiliation_string":"Codec Avatars Lab, Pittsburgh, PA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003700236","display_name":"Dejan Markovi\u0107","orcid":"https://orcid.org/0000-0002-6744-7531"},"institutions":[{"id":"https://openalex.org/I4210143443","display_name":"Allegheny Valley Hospital","ror":"https://ror.org/038c39g95","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1310421338","https://openalex.org/I4210143443"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dejan Markovi\u0107","raw_affiliation_strings":["Codec Avatars Lab,Pittsburgh,PA,USA","Codec Avatars Lab, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Codec Avatars Lab,Pittsburgh,PA,USA","institution_ids":["https://openalex.org/I4210143443"]},{"raw_affiliation_string":"Codec Avatars Lab, Pittsburgh, PA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002770382","display_name":"Steven Krenn","orcid":null},"institutions":[{"id":"https://openalex.org/I4210143443","display_name":"Allegheny Valley Hospital","ror":"https://ror.org/038c39g95","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1310421338","https://openalex.org/I4210143443"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Steven Krenn","raw_affiliation_strings":["Codec Avatars Lab,Pittsburgh,PA,USA","Codec Avatars Lab, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Codec Avatars Lab,Pittsburgh,PA,USA","institution_ids":["https://openalex.org/I4210143443"]},{"raw_affiliation_string":"Codec Avatars Lab, Pittsburgh, PA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072548521","display_name":"Israel D. Gebru","orcid":null},"institutions":[{"id":"https://openalex.org/I4210143443","display_name":"Allegheny Valley Hospital","ror":"https://ror.org/038c39g95","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1310421338","https://openalex.org/I4210143443"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Israel D. Gebru","raw_affiliation_strings":["Codec Avatars Lab,Pittsburgh,PA,USA","Codec Avatars Lab, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Codec Avatars Lab,Pittsburgh,PA,USA","institution_ids":["https://openalex.org/I4210143443"]},{"raw_affiliation_string":"Codec Avatars Lab, Pittsburgh, PA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029517860","display_name":"Alexander Richard","orcid":null},"institutions":[{"id":"https://openalex.org/I4210143443","display_name":"Allegheny Valley Hospital","ror":"https://ror.org/038c39g95","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1310421338","https://openalex.org/I4210143443"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander Richard","raw_affiliation_strings":["Codec Avatars Lab,Pittsburgh,PA,USA","Codec Avatars Lab, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Codec Avatars Lab,Pittsburgh,PA,USA","institution_ids":["https://openalex.org/I4210143443"]},{"raw_affiliation_string":"Codec Avatars Lab, Pittsburgh, PA, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5003177601"],"corresponding_institution_ids":["https://openalex.org/I4210143443"],"apc_list":null,"apc_paid":null,"fwci":0.7501,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.6548881,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"361","last_page":"365"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10822","display_name":"Acoustic Wave Phenomena Research","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.8843933343887329},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7658576965332031},{"id":"https://openalex.org/keywords/codec","display_name":"Codec","score":0.7131396532058716},{"id":"https://openalex.org/keywords/mean-opinion-score","display_name":"Mean opinion score","score":0.6883982419967651},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5941700339317322},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4296191334724426},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4230620265007019},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.42193302512168884},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.4208701252937317},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.17927390336990356},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.12210509181022644},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09170347452163696}],"concepts":[{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.8843933343887329},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7658576965332031},{"id":"https://openalex.org/C161765866","wikidata":"https://www.wikidata.org/wiki/Q184748","display_name":"Codec","level":2,"score":0.7131396532058716},{"id":"https://openalex.org/C62897895","wikidata":"https://www.wikidata.org/wiki/Q1915482","display_name":"Mean opinion score","level":3,"score":0.6883982419967651},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5941700339317322},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4296191334724426},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4230620265007019},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.42193302512168884},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.4208701252937317},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.17927390336990356},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.12210509181022644},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09170347452163696},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10448371","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10448371","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1481955708","https://openalex.org/W1505878979","https://openalex.org/W1552314771","https://openalex.org/W1922168135","https://openalex.org/W1991111872","https://openalex.org/W2017957151","https://openalex.org/W2131738223","https://openalex.org/W2141998673","https://openalex.org/W2165291881","https://openalex.org/W2757519008","https://openalex.org/W2935711438","https://openalex.org/W2963182577","https://openalex.org/W2964058413","https://openalex.org/W2964073500","https://openalex.org/W2972354707","https://openalex.org/W3004970274","https://openalex.org/W3015338123","https://openalex.org/W3143595541","https://openalex.org/W3215615641","https://openalex.org/W4221144097","https://openalex.org/W4297841790","https://openalex.org/W4307323391","https://openalex.org/W4372260247","https://openalex.org/W4372270198","https://openalex.org/W4380434618","https://openalex.org/W6630442970","https://openalex.org/W6767111847","https://openalex.org/W6779823529","https://openalex.org/W6782760101","https://openalex.org/W6783182287","https://openalex.org/W6783867762","https://openalex.org/W6786375611","https://openalex.org/W6798098866"],"related_works":["https://openalex.org/W2538576716","https://openalex.org/W3155820142","https://openalex.org/W2151825755","https://openalex.org/W1193244420","https://openalex.org/W4386499642","https://openalex.org/W2504801227","https://openalex.org/W3158195778","https://openalex.org/W2143058592","https://openalex.org/W1973628965","https://openalex.org/W2019884957"],"abstract_inverted_index":{"Although":[0],"recent":[1],"mainstream":[2],"waveform-domain":[3],"end-to-end":[4],"(E2E)":[5],"neural":[6,43],"audio":[7,12,27],"codecs":[8,44,59],"achieve":[9,67],"impressive":[10],"coded":[11,24],"quality":[13,20],"with":[14,70,105,131,143],"a":[15,71,90,132],"very":[16],"low":[17],"bitrate,":[18,73],"the":[19,23,47,62,75,82,96,106,122],"gap":[21],"between":[22],"and":[25,78,84,100,118,124,137,146],"natural":[26],"is":[28,37],"still":[29],"significant.":[30],"A":[31],"generative":[32],"adversarial":[33,55],"network":[34],"(GAN)":[35],"training":[36],"usually":[38],"required":[39],"for":[40],"these":[41,58],"E2E":[42],"because":[45],"of":[46,49,81],"difficulty":[48],"direct":[50],"phase":[51,64,148],"modeling.":[52],"However,":[53],"such":[54],"learning":[56],"hinders":[57],"from":[60],"preserving":[61],"original":[63,76],"information.":[65,149],"To":[66],"human-level":[68,144],"naturalness":[69,145],"reasonable":[72],"preserve":[74],"phase,":[77],"get":[79],"rid":[80],"tricky":[83],"opaque":[85],"GAN":[86],"training,":[87],"we":[88],"develop":[89],"score-based":[91],"diffusion":[92],"post-filter":[93],"(SPF)":[94],"in":[95],"complex":[97],"spectral":[98,117],"domain":[99],"combine":[101],"our":[102],"previous":[103],"AudioDec":[104],"SPF":[107],"to":[108],"propose":[109],"ScoreDec,":[110],"which":[111],"can":[112],"be":[113],"trained":[114],"using":[115],"only":[116],"score-matching":[119],"losses.":[120],"Both":[121],"objective":[123],"subjective":[125],"experimental":[126],"results":[127],"show":[128],"that":[129],"ScoreDec":[130],"24":[133],"kbps":[134],"bitrate":[135],"encodes":[136],"decodes":[138],"full-band":[139],"48":[140],"kHz":[141],"speech":[142],"well-preserved":[147]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
