{"id":"https://openalex.org/W4392903736","doi":"https://doi.org/10.1109/icassp48485.2024.10447539","title":"A Closer Look at Wav2vec2 Embeddings for On-Device Single-Channel Speech Enhancement","display_name":"A Closer Look at Wav2vec2 Embeddings for On-Device Single-Channel Speech Enhancement","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903736","doi":"https://doi.org/10.1109/icassp48485.2024.10447539"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447539","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447539","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081267420","display_name":"Ravi Shankar","orcid":"https://orcid.org/0000-0002-6641-4025"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ravi Shankar","raw_affiliation_strings":["Johns Hopkins University,Department of Electrical and Computer Engineering,USA","Department of Electrical and Computer Engineering, Johns Hopkins University, USA"],"affiliations":[{"raw_affiliation_string":"Johns Hopkins University,Department of Electrical and Computer Engineering,USA","institution_ids":["https://openalex.org/I145311948"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Johns Hopkins University, USA","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075227645","display_name":"Ke Tan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ke Tan","raw_affiliation_strings":["Redmond,Meta Reality Labs,Washington,USA","Meta Reality Labs, Redmond, Washington, USA"],"affiliations":[{"raw_affiliation_string":"Redmond,Meta Reality Labs,Washington,USA","institution_ids":["https://openalex.org/I4210128585"]},{"raw_affiliation_string":"Meta Reality Labs, Redmond, Washington, USA","institution_ids":["https://openalex.org/I4210128585"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040562171","display_name":"Buye Xu","orcid":"https://orcid.org/0000-0002-3027-7567"},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Buye Xu","raw_affiliation_strings":["Redmond,Meta Reality Labs,Washington,USA","Meta Reality Labs, Redmond, Washington, USA"],"affiliations":[{"raw_affiliation_string":"Redmond,Meta Reality Labs,Washington,USA","institution_ids":["https://openalex.org/I4210128585"]},{"raw_affiliation_string":"Meta Reality Labs, Redmond, Washington, USA","institution_ids":["https://openalex.org/I4210128585"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080751032","display_name":"Anurag Kumar","orcid":"https://orcid.org/0000-0002-1164-144X"},"institutions":[{"id":"https://openalex.org/I4210128585","display_name":"META Health","ror":"https://ror.org/035h67p10","country_code":"US","type":"other","lineage":["https://openalex.org/I4210128585"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anurag Kumar","raw_affiliation_strings":["Redmond,Meta Reality Labs,Washington,USA","Meta Reality Labs, Redmond, Washington, USA"],"affiliations":[{"raw_affiliation_string":"Redmond,Meta Reality Labs,Washington,USA","institution_ids":["https://openalex.org/I4210128585"]},{"raw_affiliation_string":"Meta Reality Labs, Redmond, Washington, USA","institution_ids":["https://openalex.org/I4210128585"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5081267420"],"corresponding_institution_ids":["https://openalex.org/I145311948"],"apc_list":null,"apc_paid":null,"fwci":0.7471,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.65187129,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"751","last_page":"755"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.8040682077407837},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7495341897010803},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.706246018409729},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6588424444198608},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.6433645486831665},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.5943201780319214},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5284621119499207},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.4687842130661011},{"id":"https://openalex.org/keywords/footprint","display_name":"Footprint","score":0.44519197940826416},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4323594570159912},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36965858936309814},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09363612532615662},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.0761406421661377},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.07372379302978516}],"concepts":[{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.8040682077407837},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7495341897010803},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.706246018409729},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6588424444198608},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.6433645486831665},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.5943201780319214},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5284621119499207},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.4687842130661011},{"id":"https://openalex.org/C132943942","wikidata":"https://www.wikidata.org/wiki/Q2562511","display_name":"Footprint","level":2,"score":0.44519197940826416},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4323594570159912},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36965858936309814},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09363612532615662},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0761406421661377},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.07372379302978516},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447539","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447539","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W2033594129","https://openalex.org/W2406737436","https://openalex.org/W2842698324","https://openalex.org/W2914304175","https://openalex.org/W2964058413","https://openalex.org/W2991361823","https://openalex.org/W3015337486","https://openalex.org/W3031135612","https://openalex.org/W3036601975","https://openalex.org/W3094932283","https://openalex.org/W3096408984","https://openalex.org/W3122264812","https://openalex.org/W3160085755","https://openalex.org/W3160320508","https://openalex.org/W3167533889","https://openalex.org/W3197042120","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W4200047557","https://openalex.org/W4221144097","https://openalex.org/W4224933800","https://openalex.org/W4226219411","https://openalex.org/W4226403810","https://openalex.org/W4281820413","https://openalex.org/W4292969786","https://openalex.org/W4297841357","https://openalex.org/W4301371414","https://openalex.org/W4380434618","https://openalex.org/W6738884980","https://openalex.org/W6757632829","https://openalex.org/W6780218876","https://openalex.org/W6783867762","https://openalex.org/W6803547063"],"related_works":["https://openalex.org/W2120771489","https://openalex.org/W2051376034","https://openalex.org/W2294333436","https://openalex.org/W2955597484","https://openalex.org/W3110551121","https://openalex.org/W2653598178","https://openalex.org/W2747006289","https://openalex.org/W2072884270","https://openalex.org/W4252682934","https://openalex.org/W2089240210"],"abstract_inverted_index":{"Self-supervised":[0],"learned":[1],"models":[2,94],"have":[3,61],"been":[4],"found":[5],"to":[6,30,96],"be":[7,31],"very":[8],"effective":[9],"for":[10,48],"tasks":[11],"such":[12,93],"as":[13],"automatic":[14],"speech":[15,25,50,73],"recognition,":[16],"speaker":[17],"identification,":[18],"and":[19,34,55,79],"others.":[20],"However,":[21],"their":[22],"utility":[23],"in":[24,52],"enhancement":[26,51,64,74],"systems":[27],"is":[28],"yet":[29],"firmly":[32],"established,":[33],"perhaps":[35],"slightly":[36],"misunderstood.":[37],"In":[38],"this":[39],"paper,":[40],"we":[41,86],"investigate":[42],"the":[43,57,63,80],"uses":[44],"of":[45],"SSL":[46],"representations":[47],"single-channel":[49],"challenging":[53],"conditions":[54,91],"establish":[56],"impact":[58],"they":[59],"can":[60],"on":[62,88],"task.":[65],"Our":[66],"constraints":[67],"are":[68],"designed":[69],"around":[70],"on-device":[71],"real-time":[72],"\u2013":[75],"model":[76],"being":[77,83],"causal,":[78],"compute":[81],"footprint":[82],"small.":[84],"Additionally,":[85],"focus":[87],"low":[89],"SNR":[90],"where":[92],"struggle":[95],"provide":[97],"good":[98],"performance.":[99]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
