{"id":"https://openalex.org/W7128727177","doi":"https://doi.org/10.48550/arxiv.2602.10666","title":"From Diet to Free Lunch: Estimating Auxiliary Signal Properties using Dynamic Pruning Masks in Speech Enhancement Networks","display_name":"From Diet to Free Lunch: Estimating Auxiliary Signal Properties using Dynamic Pruning Masks in Speech Enhancement Networks","publication_year":2026,"publication_date":"2026-02-11","ids":{"openalex":"https://openalex.org/W7128727177","doi":"https://doi.org/10.48550/arxiv.2602.10666"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.10666","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.10666","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.10666","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087077768","display_name":"Riccardo Miccini","orcid":"https://orcid.org/0000-0002-0421-6170"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Miccini, Riccardo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125691245","display_name":"Cl\u00e9ment Laroche","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Laroche, Cl\u00e9ment","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018887201","display_name":"Tobias Piechowiak","orcid":"https://orcid.org/0000-0002-6446-5208"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Piechowiak, Tobias","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018416804","display_name":"Xenofon Fafoutis","orcid":"https://orcid.org/0000-0002-9871-0013"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fafoutis, Xenofon","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5033388505","display_name":"Luca Pezzarossa","orcid":"https://orcid.org/0000-0002-0863-2526"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pezzarossa, Luca","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5087077768"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9731000065803528,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9731000065803528,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.006500000134110451,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.0035000001080334187,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.6991000175476074},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.6625999808311462},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5597000122070312},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5321999788284302},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.5069000124931335},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.4715000092983246},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.45489999651908875},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.44519999623298645},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.36959999799728394}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7705000042915344},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.6991000175476074},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.6625999808311462},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5597000122070312},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5454000234603882},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5321999788284302},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.5069000124931335},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5055000185966492},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.4715000092983246},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.45489999651908875},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.44519999623298645},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3806999921798706},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.36959999799728394},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.35179999470710754},{"id":"https://openalex.org/C13944312","wikidata":"https://www.wikidata.org/wiki/Q7512748","display_name":"Signal-to-noise ratio (imaging)","level":2,"score":0.3343000113964081},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.3328999876976013},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.33160001039505005},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3314000070095062},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.31520000100135803},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.30730000138282776},{"id":"https://openalex.org/C157524613","wikidata":"https://www.wikidata.org/wiki/Q2828883","display_name":"Fine-tuning","level":2,"score":0.2985999882221222},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.29109999537467957},{"id":"https://openalex.org/C3020402766","wikidata":"https://www.wikidata.org/wiki/Q104376712","display_name":"Prior information","level":2,"score":0.2870999872684479},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.25999999046325684},{"id":"https://openalex.org/C82142266","wikidata":"https://www.wikidata.org/wiki/Q3456604","display_name":"Dynamic Bayesian network","level":3,"score":0.25839999318122864},{"id":"https://openalex.org/C2779190172","wikidata":"https://www.wikidata.org/wiki/Q4913888","display_name":"Binary data","level":3,"score":0.2524999976158142},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.25029999017715454}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.10666","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.10666","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.10666","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.10666","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.47476691007614136,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Speech":[0],"Enhancement":[1],"(SE)":[2],"in":[3],"audio":[4],"devices":[5],"is":[6,46,143],"often":[7,37],"supported":[8],"by":[9,71],"auxiliary":[10],"modules":[11],"for":[12,102,181],"Voice":[13],"Activity":[14],"Detection":[15],"(VAD),":[16],"SNR":[17],"estimation,":[18],"or":[19],"Acoustic":[20],"Scene":[21],"Classification":[22],"to":[23,68,113,135,163],"ensure":[24],"robust":[25],"context-aware":[26],"behavior":[27,152],"and":[28,56,122,174,184],"seamless":[29],"user":[30],"experience.":[31],"Just":[32],"like":[33],"SE,":[34],"these":[35,94],"tasks":[36],"employ":[38],"deep":[39],"learning;":[40,168],"however,":[41],"deploying":[42],"additional":[43,54],"models":[44,155],"on-device":[45],"computationally":[47],"impractical,":[48],"whereas":[49],"cloud-based":[50],"inference":[51],"would":[52],"introduce":[53],"latency":[55],"compromise":[57],"privacy.":[58],"Prior":[59],"work":[60],"on":[61,77,116,119,127,145,169],"SE":[62,183],"employed":[63],"Dynamic":[64],"Channel":[65],"Pruning":[66],"(DynCP)":[67],"reduce":[69,134],"computation":[70],"adaptively":[72],"disabling":[73],"specific":[74],"channels":[75],"based":[76],"the":[78,100,150,157,170],"current":[79],"input.":[80],"In":[81],"this":[82],"work,":[83],"we":[84,148,172],"investigate":[85],"whether":[86],"useful":[87],"signal":[88,188],"properties":[89],"can":[90],"be":[91],"estimated":[92],"from":[93],"internal":[95],"pruning":[96],"masks,":[97,132],"thus":[98],"removing":[99],"need":[101],"separate":[103],"models.":[104],"We":[105],"show":[106],"that":[107],"simple,":[108],"interpretable":[109],"predictors":[110],"achieve":[111],"up":[112],"93%":[114],"accuracy":[115],"VAD,":[117],"84%":[118],"noise":[120],"classification,":[121],"an":[123],"R2":[124],"of":[125,153,159,187],"0.86":[126],"F0":[128],"estimation.":[129],"With":[130],"binary":[131],"predictions":[133],"weighted":[136],"sums,":[137],"inducing":[138],"negligible":[139],"overhead.":[140],"Our":[141],"contribution":[142],"twofold:":[144],"one":[146],"hand,":[147],"examine":[149],"emergent":[151],"DynCP":[154,176],"through":[156],"lens":[158],"downstream":[160],"prediction":[161],"tasks,":[162],"reveal":[164],"what":[165],"they":[166],"are":[167],"other,":[171],"repurpose":[173],"re-propose":[175],"as":[177],"a":[178],"holistic":[179],"solution":[180],"efficient":[182],"simultaneous":[185],"estimation":[186],"properties.":[189]},"counts_by_year":[],"updated_date":"2026-02-13T13:40:29.240086","created_date":"2026-02-13T00:00:00"}
