{"id":"https://openalex.org/W7160320123","doi":"https://doi.org/10.48550/arxiv.2605.02718","title":"Private Speech Classification without Collapse: Stabilized DP Training and Offline Distillation","display_name":"Private Speech Classification without Collapse: Stabilized DP Training and Offline Distillation","publication_year":2026,"publication_date":"2026-05-04","ids":{"openalex":"https://openalex.org/W7160320123","doi":"https://doi.org/10.48550/arxiv.2605.02718"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.02718","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.02718","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.02718","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135319587","display_name":"Yadi Wen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wen, Yadi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135309231","display_name":"Tianxin Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Tianxin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135407534","display_name":"Enji Liang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Enji","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100541802","display_name":"Rong Du","orcid":"https://orcid.org/0000-0002-5478-896X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Du, Rong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5024796752","display_name":"Yue Fu","orcid":"https://orcid.org/0000-0002-5347-7211"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Yue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5135319587"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.5041999816894531,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.5041999816894531,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.11219999939203262,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.0763000026345253,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6786999702453613},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.5598999857902527},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5407000184059143},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5375000238418579},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.41839998960494995},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4142000079154968},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.413100004196167},{"id":"https://openalex.org/keywords/online-and-offline","display_name":"Online and offline","score":0.4099999964237213}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7419000267982483},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6786999702453613},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.5598999857902527},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5407000184059143},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5375000238418579},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43720000982284546},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.41839998960494995},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4142000079154968},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.413100004196167},{"id":"https://openalex.org/C2780102126","wikidata":"https://www.wikidata.org/wiki/Q10928179","display_name":"Online and offline","level":2,"score":0.4099999964237213},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.40220001339912415},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3928999900817871},{"id":"https://openalex.org/C2776848632","wikidata":"https://www.wikidata.org/wiki/Q853463","display_name":"Clipping (morphology)","level":2,"score":0.3910999894142151},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.3871000111103058},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.3677000105381012},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.34450000524520874},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.3276999890804291},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3215999901294708},{"id":"https://openalex.org/C99221444","wikidata":"https://www.wikidata.org/wiki/Q1532069","display_name":"Private information retrieval","level":2,"score":0.31700000166893005},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.30309998989105225},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.2777999937534332},{"id":"https://openalex.org/C2776207758","wikidata":"https://www.wikidata.org/wiki/Q5303302","display_name":"Downstream (manufacturing)","level":2,"score":0.275299996137619},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2529999911785126},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.2524999976158142}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.02718","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.02718","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.02718","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.02718","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.5912108421325684}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,87,192,224],"study":[1],"example-level":[2],"private":[3,57,110],"supervised":[4],"speech":[5,30],"classification":[6],"under":[7,125,204,208],"a":[8,45,62,76,80,94,108,130,135,149,228],"practical":[9],"release":[10,43,105],"constraint:":[11],"training":[12,72],"may":[13,73],"access":[14],"privileged":[15,215],"side":[16,36],"information,":[17],"but":[18],"the":[19,56,117,163,182],"released":[20,183],"model":[21,48],"must":[22],"be":[23],"audio-only.":[24],"This":[25,98],"setting":[26,106,124,195],"is":[27,100],"important":[28],"because":[29,107],"systems":[31],"can":[32,85],"often":[33],"exploit":[34],"richer":[35],"information":[37],"during":[38],"development,":[39],"whereas":[40],"deployment":[41],"and":[42,93,142,179,210,220,240],"require":[44],"lightweight":[46],"unimodal":[47],"with":[49,185,227],"auditable":[50],"privacy":[51,180],"guarantees.":[52],"Using":[53],"DP-SGD":[54],"on":[55,68,140,148,214],"dataset":[58,153],"$D_{\\text{priv}}$,":[59,141],"we":[60,128,172],"identify":[61],"strong-privacy":[63],"failure":[64,99],"mode":[65],"($\u03b5\\le":[66],"1$)":[67],"imbalanced":[69],"tasks,":[70],"where":[71],"collapse":[74,96],"to":[75,170,187],"near":[77],"single-class":[78],"predictor,":[79],"phenomenon":[81],"that":[82],"overall":[83],"accuracy":[84],"obscure.":[86],"therefore":[88],"emphasize":[89],"Macro-F1,":[90],"balanced":[91],"accuracy,":[92],"simple":[95],"diagnostic.":[97],"especially":[101],"problematic":[102],"in":[103],"our":[104],"collapsed":[109],"teacher":[111,139,158,212],"cannot":[112],"provide":[113],"useful":[114],"supervision":[115],"for":[116,177],"downstream":[118],"audio-only":[119,146],"student.":[120,164],"To":[121],"address":[122,225],"this":[123,194],"strong":[126],"privacy,":[127],"propose":[129],"two-stage":[131],"protocol:":[132],"(i)":[133],"train":[134],"(possibly":[136],"multimodal)":[137],"DP":[138,166,175],"(ii)":[143],"distill":[144],"an":[145],"student":[147,184],"fixed,":[150],"recording-disjoint":[151],"auxiliary":[152],"$D_{\\text{aux}}$":[154],"using":[155],"one-shot":[156],"offline":[157,241],"probability":[159],"outputs,":[160],"releasing":[161],"only":[162,169],"The":[165],"guarantee":[167],"applies":[168],"$D_{\\text{priv}}$;":[171],"make":[173],"no":[174],"claim":[176],"$D_{\\text{aux}}$,":[178],"of":[181],"respect":[186],"$D_{\\text{priv}}$":[188],"follows":[189],"by":[190],"post-processing.":[191],"frame":[193],"as":[196],"involving":[197],"four":[198],"coupled":[199],"bottlenecks:":[200],"speech-induced":[201],"optimization":[202],"instability":[203],"DP-SGD,":[205],"minority-class":[206],"erosion":[207],"clipping":[209],"noise,":[211],"over-reliance":[213],"modalities":[216],"unavailable":[217],"at":[218],"deployment,":[219],"train--deploy":[221],"modality":[222],"mismatch.":[223],"them":[226],"DP-stabilizing":[229],"acoustic":[230],"front-end":[231],"(DSAF),":[232],"minibatch-adaptive":[233],"bounded":[234],"loss":[235],"reweighting":[236],"(AW-DP),":[237],"privileged-modality":[238],"dropout,":[239],"teacher-to-student":[242],"distillation.":[243]},"counts_by_year":[],"updated_date":"2026-05-06T06:10:43.113611","created_date":"2026-05-06T00:00:00"}
