{"id":"https://openalex.org/W4375868990","doi":"https://doi.org/10.1109/icassp49357.2023.10095666","title":"Perceive and Predict: Self-Supervised Speech Representation Based Loss Functions for Speech Enhancement","display_name":"Perceive and Predict: Self-Supervised Speech Representation Based Loss Functions for Speech Enhancement","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375868990","doi":"https://doi.org/10.1109/icassp49357.2023.10095666"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095666","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095666","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012132420","display_name":"George Close","orcid":"https://orcid.org/0000-0002-9478-5421"},"institutions":[{"id":"https://openalex.org/I91136226","display_name":"University of Sheffield","ror":"https://ror.org/05krs5044","country_code":"GB","type":"education","lineage":["https://openalex.org/I91136226"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"George Close","raw_affiliation_strings":["University of Sheffield,Department of Computer Science","Department of Computer Science, University of Sheffield"],"affiliations":[{"raw_affiliation_string":"University of Sheffield,Department of Computer Science","institution_ids":["https://openalex.org/I91136226"]},{"raw_affiliation_string":"Department of Computer Science, University of Sheffield","institution_ids":["https://openalex.org/I91136226"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004802529","display_name":"William Ravenscroft","orcid":"https://orcid.org/0000-0002-0780-3303"},"institutions":[{"id":"https://openalex.org/I91136226","display_name":"University of Sheffield","ror":"https://ror.org/05krs5044","country_code":"GB","type":"education","lineage":["https://openalex.org/I91136226"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"William Ravenscroft","raw_affiliation_strings":["University of Sheffield,Department of Computer Science","Department of Computer Science, University of Sheffield"],"affiliations":[{"raw_affiliation_string":"University of Sheffield,Department of Computer Science","institution_ids":["https://openalex.org/I91136226"]},{"raw_affiliation_string":"Department of Computer Science, University of Sheffield","institution_ids":["https://openalex.org/I91136226"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030528300","display_name":"Thomas Hain","orcid":"https://orcid.org/0000-0003-0939-3464"},"institutions":[{"id":"https://openalex.org/I91136226","display_name":"University of Sheffield","ror":"https://ror.org/05krs5044","country_code":"GB","type":"education","lineage":["https://openalex.org/I91136226"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Thomas Hain","raw_affiliation_strings":["University of Sheffield,Department of Computer Science","Department of Computer Science, University of Sheffield"],"affiliations":[{"raw_affiliation_string":"University of Sheffield,Department of Computer Science","institution_ids":["https://openalex.org/I91136226"]},{"raw_affiliation_string":"Department of Computer Science, University of Sheffield","institution_ids":["https://openalex.org/I91136226"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027797344","display_name":"Stefan Goetze","orcid":"https://orcid.org/0000-0003-1044-7343"},"institutions":[{"id":"https://openalex.org/I91136226","display_name":"University of Sheffield","ror":"https://ror.org/05krs5044","country_code":"GB","type":"education","lineage":["https://openalex.org/I91136226"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Stefan Goetze","raw_affiliation_strings":["University of Sheffield,Department of Computer Science","Department of Computer Science, University of Sheffield"],"affiliations":[{"raw_affiliation_string":"University of Sheffield,Department of Computer Science","institution_ids":["https://openalex.org/I91136226"]},{"raw_affiliation_string":"Department of Computer Science, University of Sheffield","institution_ids":["https://openalex.org/I91136226"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5012132420"],"corresponding_institution_ids":["https://openalex.org/I91136226"],"apc_list":null,"apc_paid":null,"fwci":1.6541,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.84259557,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10283","display_name":"Hearing Loss and Rehabilitation","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pesq","display_name":"PESQ","score":0.8879821300506592},{"id":"https://openalex.org/keywords/intelligibility","display_name":"Intelligibility (philosophy)","score":0.7625539302825928},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.7521525025367737},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7282631397247314},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6887205243110657},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.6426193118095398},{"id":"https://openalex.org/keywords/mean-opinion-score","display_name":"Mean opinion score","score":0.6269277334213257},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5096395015716553},{"id":"https://openalex.org/keywords/psqm","display_name":"PSQM","score":0.5049603581428528},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.42202699184417725},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.3957447409629822},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.359050452709198},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3258514702320099},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.1259092390537262},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.06996825337409973},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.06748244166374207}],"concepts":[{"id":"https://openalex.org/C103734657","wikidata":"https://www.wikidata.org/wiki/Q2739975","display_name":"PESQ","level":4,"score":0.8879821300506592},{"id":"https://openalex.org/C60048801","wikidata":"https://www.wikidata.org/wiki/Q1433889","display_name":"Intelligibility (philosophy)","level":2,"score":0.7625539302825928},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.7521525025367737},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7282631397247314},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6887205243110657},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.6426193118095398},{"id":"https://openalex.org/C62897895","wikidata":"https://www.wikidata.org/wiki/Q1915482","display_name":"Mean opinion score","level":3,"score":0.6269277334213257},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5096395015716553},{"id":"https://openalex.org/C108699837","wikidata":"https://www.wikidata.org/wiki/Q7120750","display_name":"PSQM","level":4,"score":0.5049603581428528},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.42202699184417725},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3957447409629822},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.359050452709198},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3258514702320099},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.1259092390537262},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06996825337409973},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.06748244166374207},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095666","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095666","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.5400000214576721}],"awards":[],"funders":[{"id":"https://openalex.org/F4320314731","display_name":"UK Research and Innovation","ror":"https://ror.org/001aqnf71"},{"id":"https://openalex.org/F4320338440","display_name":"HORIZON EUROPE Health","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1522301498","https://openalex.org/W1552314771","https://openalex.org/W2141998673","https://openalex.org/W2757519008","https://openalex.org/W2896457183","https://openalex.org/W2897312934","https://openalex.org/W2962935966","https://openalex.org/W2963045393","https://openalex.org/W2964058413","https://openalex.org/W2971417062","https://openalex.org/W3008762051","https://openalex.org/W3015191643","https://openalex.org/W3036601975","https://openalex.org/W3086154751","https://openalex.org/W3167533889","https://openalex.org/W3169320628","https://openalex.org/W3196475561","https://openalex.org/W3197284240","https://openalex.org/W3197580070","https://openalex.org/W3197912330","https://openalex.org/W3207551191","https://openalex.org/W3209141406","https://openalex.org/W3213029956","https://openalex.org/W4221144124","https://openalex.org/W4280557595","https://openalex.org/W4296068997","https://openalex.org/W4297841357","https://openalex.org/W4312959668","https://openalex.org/W4385245566","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6780218876"],"related_works":["https://openalex.org/W2144085034","https://openalex.org/W2988616598","https://openalex.org/W59212688","https://openalex.org/W2295525327","https://openalex.org/W2098596704","https://openalex.org/W2060131684","https://openalex.org/W2243478085","https://openalex.org/W4287113595","https://openalex.org/W3176271478","https://openalex.org/W1893647155"],"abstract_inverted_index":{"Recent":[0],"work":[1,30,68],"in":[2,18,57],"the":[3,10,19,34,47,73,76,119],"domain":[4],"of":[5,12,21,28,39,53,79,90,121,147],"speech":[6,14,23,42,83,91,135,148],"enhancement":[7,24,136],"has":[8],"explored":[9],"use":[11,52,120],"self-supervised":[13],"representations":[15,56],"to":[16],"aid":[17],"training":[20],"neural":[22],"models.":[25],"However,":[26],"much":[27],"this":[29,67,107],"focuses":[31],"on":[32],"using":[33,106,140],"deepest":[35],"or":[36],"final":[37],"outputs":[38],"self":[40,54],"supervised":[41,55],"representation":[43],"models,":[44],"rather":[45],"than":[46],"earlier":[48],"feature":[49,77],"encodings.":[50],"The":[51],"such":[58,143],"a":[59,110],"way":[60],"is":[61,70,138],"often":[62],"not":[63],"fully":[64],"motivated.":[65],"In":[66],"it":[69],"shown":[71],"that":[72],"distance":[74,108,124],"between":[75],"encodings":[78],"clean":[80],"and":[81,93,115,151],"noisy":[82],"correlate":[84],"strongly":[85],"with":[86,98],"psychoacoustically":[87],"motivated":[88],"measures":[89,142],"quality":[92,149],"intelligibility,":[94],"as":[95,97,109,127,129,144],"well":[96,128],"human":[99],"Mean":[100],"Opinion":[101],"Score":[102],"(MOS)":[103],"ratings.":[104],"Experiments":[105],"loss":[111,126,132],"function":[112],"are":[113],"performed":[114],"improved":[116],"performance":[117],"over":[118],"STFT":[122],"spectrogram":[123],"based":[125],"other":[130],"common":[131],"functions":[133],"from":[134],"literature":[137],"demonstrated":[139],"objective":[141,153],"perceptual":[145],"evaluation":[146],"(PESQ)":[150],"short-time":[152],"intelligibility":[154],"(STOI).":[155]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
