{"id":"https://openalex.org/W4404341570","doi":"https://doi.org/10.1145/3729462","title":"USpeech: Ultrasound-Enhanced Speech with Minimal Human Effort via Cross-Modal Synthesis","display_name":"USpeech: Ultrasound-Enhanced Speech with Minimal Human Effort via Cross-Modal Synthesis","publication_year":2025,"publication_date":"2025-06-09","ids":{"openalex":"https://openalex.org/W4404341570","doi":"https://doi.org/10.1145/3729462"},"language":"en","primary_location":{"id":"doi:10.1145/3729462","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3729462","pdf_url":null,"source":{"id":"https://openalex.org/S4210219751","display_name":"Proceedings of the ACM on Interactive Mobile Wearable and Ubiquitous Technologies","issn_l":"2474-9567","issn":["2474-9567"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2410.22076","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Luca Jiang-Tao Yu","orcid":"https://orcid.org/0009-0004-3964-5874"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Luca Jiang-Tao Yu","raw_affiliation_strings":["The University of Hong Kong, Hong Kong SAR, China"],"raw_orcid":"https://orcid.org/0009-0004-3964-5874","affiliations":[{"raw_affiliation_string":"The University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057284689","display_name":"Running Zhao","orcid":"https://orcid.org/0000-0003-2496-3429"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Running Zhao","raw_affiliation_strings":["The University of Hong Kong, Hong Kong SAR, China"],"raw_orcid":"https://orcid.org/0000-0003-2496-3429","affiliations":[{"raw_affiliation_string":"The University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059429176","display_name":"Sijie Ji","orcid":"https://orcid.org/0000-0002-6615-1982"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Sijie Ji","raw_affiliation_strings":["The University of Hong Kong, Hong Kong SAR, China"],"raw_orcid":"https://orcid.org/0000-0002-6615-1982","affiliations":[{"raw_affiliation_string":"The University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077317339","display_name":"Edith C.\u2010H. Ngai","orcid":"https://orcid.org/0000-0002-3454-8731"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Edith C.H. Ngai","raw_affiliation_strings":["The University of Hong Kong, Hong Kong SAR, China"],"raw_orcid":"https://orcid.org/0000-0002-3454-8731","affiliations":[{"raw_affiliation_string":"The University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080848262","display_name":"Chenshu Wu","orcid":"https://orcid.org/0000-0002-9700-4627"},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Chenshu Wu","raw_affiliation_strings":["The University of Hong Kong, Hong Kong SAR, China"],"raw_orcid":"https://orcid.org/0000-0002-9700-4627","affiliations":[{"raw_affiliation_string":"The University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I889458895"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I889458895"],"apc_list":null,"apc_paid":null,"fwci":1.9057,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.85670482,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"9","issue":"2","first_page":"1","last_page":"31"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9498999714851379,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9498999714851379,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6867860555648804},{"id":"https://openalex.org/keywords/ultrasound","display_name":"Ultrasound","score":0.540086567401886},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.46009013056755066},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4296289384365082},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.2943021357059479},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.1915270984172821},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.13070419430732727},{"id":"https://openalex.org/keywords/composite-material","display_name":"Composite material","score":0.06485068798065186}],"concepts":[{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6867860555648804},{"id":"https://openalex.org/C143753070","wikidata":"https://www.wikidata.org/wiki/Q162564","display_name":"Ultrasound","level":2,"score":0.540086567401886},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.46009013056755066},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4296289384365082},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.2943021357059479},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.1915270984172821},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.13070419430732727},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.06485068798065186}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3729462","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3729462","pdf_url":null,"source":{"id":"https://openalex.org/S4210219751","display_name":"Proceedings of the ACM on Interactive Mobile Wearable and Ubiquitous Technologies","issn_l":"2474-9567","issn":["2474-9567"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2410.22076","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.22076","pdf_url":"https://arxiv.org/pdf/2410.22076","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:hub.hku.hk:10722/359003","is_oa":false,"landing_page_url":"https://hub.hku.hk/handle/10722/359003","pdf_url":null,"source":{"id":"https://openalex.org/S4377196271","display_name":"The HKU Scholars Hub (University of Hong Kong)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I889458895","host_organization_name":"University of Hong Kong","host_organization_lineage":["https://openalex.org/I889458895"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2410.22076","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2410.22076","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2410.22076","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.22076","pdf_url":"https://arxiv.org/pdf/2410.22076","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8676120579","display_name":null,"funder_award_id":"62222216","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4404341570.pdf","grobid_xml":"https://content.openalex.org/works/W4404341570.grobid-xml"},"referenced_works_count":75,"referenced_works":["https://openalex.org/W3161004920","https://openalex.org/W2102512139","https://openalex.org/W3046258263","https://openalex.org/W2942421255","https://openalex.org/W2099494318","https://openalex.org/W2990693807","https://openalex.org/W4390872449","https://openalex.org/W2075941399","https://openalex.org/W2108598243","https://openalex.org/W4294892047","https://openalex.org/W4396833340","https://openalex.org/W4390798774","https://openalex.org/W3147539069","https://openalex.org/W2096779346","https://openalex.org/W2585824449","https://openalex.org/W1482149378","https://openalex.org/W2207110500","https://openalex.org/W4386242195","https://openalex.org/W2990503944","https://openalex.org/W2899304011","https://openalex.org/W6749863746","https://openalex.org/W2148628165","https://openalex.org/W3182657421","https://openalex.org/W3083703874","https://openalex.org/W1635512741","https://openalex.org/W2593116425","https://openalex.org/W3094630307","https://openalex.org/W4312420631","https://openalex.org/W4380928279","https://openalex.org/W3217415709","https://openalex.org/W2887783524","https://openalex.org/W2121895216","https://openalex.org/W4284889257","https://openalex.org/W2051057783","https://openalex.org/W1500831240","https://openalex.org/W3211862173","https://openalex.org/W3094550259","https://openalex.org/W2963936489","https://openalex.org/W3083323811","https://openalex.org/W4387835420","https://openalex.org/W2525798395","https://openalex.org/W3196408167","https://openalex.org/W2610272801","https://openalex.org/W2136545725","https://openalex.org/W2394945029","https://openalex.org/W2141411743","https://openalex.org/W2035576074","https://openalex.org/W2408744528","https://openalex.org/W2962960500","https://openalex.org/W4322731291","https://openalex.org/W2937484199","https://openalex.org/W2052666245","https://openalex.org/W2963729424","https://openalex.org/W4381249885","https://openalex.org/W4231807801","https://openalex.org/W2953001150","https://openalex.org/W3163652268","https://openalex.org/W3140645045","https://openalex.org/W2897106987","https://openalex.org/W3016442829","https://openalex.org/W2067295501","https://openalex.org/W2576289912","https://openalex.org/W1538023239","https://openalex.org/W3112991842","https://openalex.org/W4297095639","https://openalex.org/W3024979138","https://openalex.org/W3015338123","https://openalex.org/W4395681055","https://openalex.org/W2998161426","https://openalex.org/W2625581252","https://openalex.org/W4200186631","https://openalex.org/W4387321423","https://openalex.org/W4366547682","https://openalex.org/W4283391228","https://openalex.org/W4385965602"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Speech":[0],"enhancement":[1,20,82,156,194],"is":[2,90,197],"crucial":[3],"for":[4,18,49,80,148],"ubiquitous":[5],"human-computer":[6],"interaction.":[7],"Recently,":[8],"ultrasound-based":[9,66,192],"acoustic":[10],"sensing":[11],"has":[12],"emerged":[13],"as":[14,106],"an":[15,145],"attractive":[16],"choice":[17],"speech":[19,67,81,155,160,169,193],"because":[21],"of":[22,65,116],"its":[23,88],"superior":[24],"ubiquity":[25],"and":[26,35,52,100,120,126,143,165],"performance.":[27],"However,":[28],"due":[29],"to":[30,56,135,187],"inevitable":[31],"interference":[32],"from":[33,113],"unexpected":[34],"unintended":[36],"sources":[37],"during":[38],"audio-ultrasound":[39,146],"data":[40,50,58,185],"acquisition,":[41],"existing":[42],"solutions":[43],"rely":[44],"heavily":[45],"on":[46],"human":[47,85],"effort":[48],"collection":[51],"processing.":[53],"This":[54,109],"leads":[55],"significant":[57],"scarcity":[59],"that":[60,94,158],"limits":[61],"the":[62,96,114,121,162,167],"full":[63],"potential":[64],"enhancement.":[68],"To":[69],"address":[70],"this,":[71],"we":[72],"propose":[73],"USPEECH,":[74],"a":[75,91,107,139,154,172],"cross-modal":[76],"ultrasound":[77,127,149,184],"synthesis":[78],"framework":[79,93,130],"with":[83],"minimal":[84],"effort.":[86],"At":[87],"core":[89],"two-stage":[92],"establishes":[95],"correspondence":[97],"between":[98,124],"visual":[99],"ultrasonic":[101],"modalities":[102,137],"by":[103],"leveraging":[104],"audio":[105],"bridge.":[108],"approach":[110],"overcomes":[111],"challenges":[112],"lack":[115],"paired":[117],"video-ultrasound":[118],"datasets":[119],"inherent":[122],"heterogeneity":[123],"video":[125],"data.":[128],"Our":[129],"incorporates":[131],"contrastive":[132],"video-audio":[133],"pre-training":[134],"project":[136],"into":[138],"shared":[140],"semantic":[141],"space":[142],"employs":[144],"encoder-decoder":[147],"synthesis.":[150],"We":[151],"then":[152],"present":[153],"network":[157],"enhances":[159],"in":[161],"time-frequency":[163],"domain":[164],"recovers":[166],"clean":[168],"waveform":[170],"via":[171],"neural":[173],"vocoder.":[174],"Comprehensive":[175],"experiments":[176],"show":[177],"USpeech":[178],"achieves":[179],"remarkable":[180],"performance":[181],"using":[182],"synthetic":[183],"comparable":[186],"physical":[188],"data,":[189],"outperforming":[190],"state-of-the-art":[191],"baselines.":[195],"USPEECH":[196],"open-sourced":[198],"at":[199],"https://github.com/aiot-lab/USpeech/.":[200]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2024-11-14T00:00:00"}
