{"id":"https://openalex.org/W7148541826","doi":"https://doi.org/10.1109/asru65441.2025.11434647","title":"MMW: Side Talk Rejection Multi-Microphone Whisper On Smart Glasses","display_name":"MMW: Side Talk Rejection Multi-Microphone Whisper On Smart Glasses","publication_year":2025,"publication_date":"2025-12-06","ids":{"openalex":"https://openalex.org/W7148541826","doi":"https://doi.org/10.1109/asru65441.2025.11434647"},"language":null,"primary_location":{"id":"doi:10.1109/asru65441.2025.11434647","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434647","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066331204","display_name":"Yang Liu","orcid":"https://orcid.org/0000-0002-9976-8671"},"institutions":[{"id":"https://openalex.org/I4210118911","display_name":"META Group","ror":"https://ror.org/027ng0s03","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210118911"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Yang Liu","raw_affiliation_strings":["Meta,US"],"affiliations":[{"raw_affiliation_string":"Meta,US","institution_ids":["https://openalex.org/I4210118911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132790974","display_name":"Li Wan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210118911","display_name":"META Group","ror":"https://ror.org/027ng0s03","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210118911"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Li Wan","raw_affiliation_strings":["Meta,US"],"affiliations":[{"raw_affiliation_string":"Meta,US","institution_ids":["https://openalex.org/I4210118911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132798179","display_name":"Yiteng Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210118911","display_name":"META Group","ror":"https://ror.org/027ng0s03","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210118911"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Yiteng Huang","raw_affiliation_strings":["Meta,US"],"affiliations":[{"raw_affiliation_string":"Meta,US","institution_ids":["https://openalex.org/I4210118911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132797820","display_name":"Yong Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210118911","display_name":"META Group","ror":"https://ror.org/027ng0s03","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210118911"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Yong Xu","raw_affiliation_strings":["Meta,US"],"affiliations":[{"raw_affiliation_string":"Meta,US","institution_ids":["https://openalex.org/I4210118911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132793316","display_name":"Yangyang Shi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210118911","display_name":"META Group","ror":"https://ror.org/027ng0s03","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210118911"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Yangyang Shi","raw_affiliation_strings":["Meta,US"],"affiliations":[{"raw_affiliation_string":"Meta,US","institution_ids":["https://openalex.org/I4210118911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132803628","display_name":"Saurabh Adya","orcid":null},"institutions":[{"id":"https://openalex.org/I4210118911","display_name":"META Group","ror":"https://ror.org/027ng0s03","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210118911"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Saurabh Adya","raw_affiliation_strings":["Meta,US"],"affiliations":[{"raw_affiliation_string":"Meta,US","institution_ids":["https://openalex.org/I4210118911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132814027","display_name":"Ming Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I4210118911","display_name":"META Group","ror":"https://ror.org/027ng0s03","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210118911"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Ming Sun","raw_affiliation_strings":["Meta,US"],"affiliations":[{"raw_affiliation_string":"Meta,US","institution_ids":["https://openalex.org/I4210118911"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5132819523","display_name":"Florian Metze","orcid":null},"institutions":[{"id":"https://openalex.org/I4210118911","display_name":"META Group","ror":"https://ror.org/027ng0s03","country_code":"IT","type":"company","lineage":["https://openalex.org/I4210118911"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Florian Metze","raw_affiliation_strings":["Meta,US"],"affiliations":[{"raw_affiliation_string":"Meta,US","institution_ids":["https://openalex.org/I4210118911"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5066331204"],"corresponding_institution_ids":["https://openalex.org/I4210118911"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.75344438,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.6353999972343445,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.6353999972343445,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.23010000586509705,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11349","display_name":"Music Technology and Sound Studies","score":0.02019999921321869,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fuse","display_name":"Fuse (electrical)","score":0.49149999022483826},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.47380000352859497},{"id":"https://openalex.org/keywords/layer","display_name":"Layer (electronics)","score":0.4472000002861023},{"id":"https://openalex.org/keywords/waveform","display_name":"Waveform","score":0.37220001220703125},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.3677000105381012},{"id":"https://openalex.org/keywords/interference","display_name":"Interference (communication)","score":0.3621000051498413}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7480999827384949},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.49149999022483826},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.47380000352859497},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.4472000002861023},{"id":"https://openalex.org/C197424946","wikidata":"https://www.wikidata.org/wiki/Q1165717","display_name":"Waveform","level":3,"score":0.37220001220703125},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.3677000105381012},{"id":"https://openalex.org/C32022120","wikidata":"https://www.wikidata.org/wiki/Q797225","display_name":"Interference (communication)","level":3,"score":0.3621000051498413},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.3467999994754791},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.3310999870300293},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3244999945163727},{"id":"https://openalex.org/C41065033","wikidata":"https://www.wikidata.org/wiki/Q2825412","display_name":"Adversary","level":2,"score":0.32179999351501465},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3131999969482422},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.3111000061035156},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.3095000088214874},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.27309998869895935},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2669000029563904},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.25929999351501465}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru65441.2025.11434647","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434647","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.46578603982925415,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W2046317813","https://openalex.org/W2060108923","https://openalex.org/W2113638573","https://openalex.org/W2128131274","https://openalex.org/W2592944988","https://openalex.org/W2810934215","https://openalex.org/W3034742263","https://openalex.org/W4214604251","https://openalex.org/W4221162997","https://openalex.org/W4385245566","https://openalex.org/W4385822775","https://openalex.org/W4387010481","https://openalex.org/W4390395706","https://openalex.org/W4392904451","https://openalex.org/W4399265137","https://openalex.org/W4402111631","https://openalex.org/W4402112153","https://openalex.org/W4402112360","https://openalex.org/W4402116591","https://openalex.org/W4404784428","https://openalex.org/W4408346050","https://openalex.org/W4415433640","https://openalex.org/W4415797881","https://openalex.org/W7133218909","https://openalex.org/W7133243644"],"related_works":[],"abstract_inverted_index":{"Smart":[0],"glasses":[1],"are":[2],"increasingly":[3],"positioned":[4],"as":[5],"the":[6,73,127,133],"nextgeneration":[7],"interface":[8],"for":[9,49],"ubiquitous":[10],"access":[11],"to":[12,31,67,91,114],"large":[13],"language":[14],"models":[15],"(LLMs).":[16],"Nevertheless,":[17],"achieving":[18],"reliable":[19],"interaction":[20],"in":[21,140],"real-world":[22],"noisy":[23,141],"environments":[24],"remains":[25],"a":[26,41,59,64,86,106],"major":[27],"challenge,":[28],"particularly":[29],"due":[30],"interference":[32],"from":[33],"side":[34,120],"speech.":[35],"In":[36],"this":[37],"work,":[38],"we":[39,57,84,104],"introduce":[40],"novel":[42],"side-talk":[43,94],"rejection":[44],"multimicrophone":[45],"Whisper":[46,101],"(MMW)":[47],"framework":[48],"smart":[50],"glasses,":[51],"incorporating":[52],"three":[53],"key":[54],"innovations.":[55],"First,":[56],"propose":[58],"Mix":[60],"Block":[61],"based":[62],"on":[63],"Tri-Mamba":[65],"architecture":[66],"effectively":[68],"fuse":[69],"multi-channel":[70],"audio":[71],"at":[72],"raw":[74],"waveform":[75],"level,":[76],"while":[77],"maintaining":[78],"compatibility":[79],"with":[80],"streaming":[81],"processing.":[82],"Second,":[83],"design":[85],"Frame":[87],"Diarization":[88],"Mamba":[89],"Layer":[90],"enhance":[92],"frame-level":[93,117],"suppression,":[95],"facilitating":[96],"more":[97],"efficient":[98],"fine-tuning":[99],"of":[100],"models.":[102],"Third,":[103],"employ":[105],"Multi-Scale":[107],"Group":[108],"Relative":[109],"Policy":[110],"Optimization":[111],"(GRPO)":[112],"strategy":[113],"jointly":[115],"optimize":[116],"and":[118],"utterance-level":[119],"speech":[121],"suppression.":[122],"Experimental":[123],"evaluations":[124],"demonstrate":[125],"that":[126],"proposed":[128],"MMW":[129],"system":[130],"can":[131],"reduce":[132],"word":[134],"error":[135],"rate":[136],"(WER)":[137],"by":[138],"4.95%":[139],"conditions.":[142]},"counts_by_year":[],"updated_date":"2026-04-03T16:44:17.987007","created_date":"2026-04-03T00:00:00"}
