{"id":"https://openalex.org/W7134852501","doi":"https://doi.org/10.48550/arxiv.2603.08179","title":"Privacy-Preserving End-to-End Full-Duplex Speech Dialogue Models","display_name":"Privacy-Preserving End-to-End Full-Duplex Speech Dialogue Models","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7134852501","doi":"https://doi.org/10.48550/arxiv.2603.08179"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.08179","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080580365","display_name":"Nikita Kuzmin","orcid":"https://orcid.org/0000-0002-8260-7118"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kuzmin, Nikita","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128675914","display_name":"Tao Zhong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhong, Tao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128634459","display_name":"Jiajun Deng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deng, Jiajun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128635856","display_name":"Yingke Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Yingke","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128671020","display_name":"Tristan Tsoi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tsoi, Tristan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056072251","display_name":"Tianxiang Cao","orcid":"https://orcid.org/0009-0009-4688-2831"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Tianxiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103134236","display_name":"Simon Lui","orcid":"https://orcid.org/0000-0002-0829-2867"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lui, Simon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128657092","display_name":"Kong Aik Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Kong Aik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128672134","display_name":"Eng Siong Chng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chng, Eng Siong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5080580365"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8492000102996826,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8492000102996826,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.057500001043081284,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10863","display_name":"Voice and Speech Disorders","score":0.013799999840557575,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5508999824523926},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.47870001196861267},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.46239998936653137},{"id":"https://openalex.org/keywords/leakage","display_name":"Leakage (economics)","score":0.4244000017642975},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4239000082015991},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.3815999925136566},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.36320000886917114}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7197999954223633},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.652899980545044},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5508999824523926},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.47870001196861267},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.46239998936653137},{"id":"https://openalex.org/C2777042071","wikidata":"https://www.wikidata.org/wiki/Q6509304","display_name":"Leakage (economics)","level":2,"score":0.4244000017642975},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4239000082015991},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.3815999925136566},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.36320000886917114},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.3610999882221222},{"id":"https://openalex.org/C2779201187","wikidata":"https://www.wikidata.org/wiki/Q2775060","display_name":"Information leakage","level":2,"score":0.33959999680519104},{"id":"https://openalex.org/C2780378346","wikidata":"https://www.wikidata.org/wiki/Q1349983","display_name":"Leak","level":2,"score":0.3393000066280365},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3343999981880188},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.31769999861717224},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3107999861240387},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2768000066280365}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.08179","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.08179","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08179","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.08179","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.6802659034729004}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"End-to-end":[0],"full-duplex":[1],"speech":[2],"models":[3],"feed":[4],"user":[5],"audio":[6],"through":[7],"an":[8],"always-on":[9],"LLM":[10],"backbone,":[11],"yet":[12],"the":[13,24,35,79,108,116],"speaker":[14,44],"privacy":[15],"implications":[16],"of":[17,38,124],"their":[18],"hidden":[19,36],"representations":[20],"remain":[21],"unexamined.":[22],"Following":[23],"VoicePrivacy":[25],"2024":[26],"protocol":[27],"with":[28,61,129],"a":[29,91,96],"lazy-informed":[30],"attacker,":[31],"we":[32],"show":[33],"that":[34,55,74],"states":[37],"SALM-Duplex":[39,62],"and":[40,51,73,95],"Moshi":[41,70],"leak":[42],"substantial":[43],"identity":[45],"across":[46,58,127],"all":[47,59],"transformer":[48],"layers.":[49],"Layer-wise":[50],"turn-wise":[52],"analyses":[53],"reveal":[54],"leakage":[56,65],"persists":[57],"layers,":[60],"showing":[63],"stronger":[64],"in":[66],"early":[67],"layers":[68],"while":[69,120],"leaks":[71],"uniformly,":[72],"Linkability":[75],"rises":[76],"sharply":[77],"within":[78],"first":[80],"few":[81],"turns.":[82],"We":[83],"propose":[84],"two":[85],"streaming":[86],"anonymization":[87],"setups":[88,128],"using":[89],"Stream-Voice-Anon:":[90],"waveform-level":[92],"front-end":[93],"(Anon-W2W)":[94],"feature-domain":[97],"replacement":[98],"(Anon-W2F).":[99],"Anon-W2F":[100],"raises":[101],"EER":[102],"by":[103],"over":[104],"3.5x":[105],"relative":[106],"to":[107,113],"discrete":[109],"encoder":[110],"baseline":[111,125],"(11.2%":[112],"41.0%),":[114],"approaching":[115],"50%":[117],"random-chance":[118],"ceiling,":[119],"Anon-W2W":[121],"retains":[122],"78-93%":[123],"sBERT":[126],"sub-second":[130],"response":[131],"latency":[132],"(FRL":[133],"under":[134],"0.8":[135],"s).":[136]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-03-11T00:00:00"}
