{"id":"https://openalex.org/W4415432655","doi":"https://doi.org/10.21437/interspeech.2025-2725","title":"Test-Time Training for Speech Enhancement","display_name":"Test-Time Training for Speech Enhancement","publication_year":2025,"publication_date":"2025-08-17","ids":{"openalex":"https://openalex.org/W4415432655","doi":"https://doi.org/10.21437/interspeech.2025-2725"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2025-2725","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2025-2725","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2508.01847","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103064560","display_name":"A. Behera","orcid":"https://orcid.org/0000-0003-2079-7849"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Avishkar Behera","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116594364","display_name":"Riya Ann Easow","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Riya Ann Easow","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009750308","display_name":"Venkatesh Parvathala","orcid":"https://orcid.org/0000-0001-6341-9480"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Venkatesh Parvathala","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5067617924","display_name":"K. Sri Rama Murty","orcid":"https://orcid.org/0000-0002-6355-5287"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"K. Sri Rama Murty","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5103064560"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16630765,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2375","last_page":"2379"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9262999892234802,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9262999892234802,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.7893000245094299},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.7871999740600586},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6603000164031982},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.527999997138977},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.508400022983551},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.484499990940094},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.4505000114440918},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.44760000705718994},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.43560001254081726},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.4275999963283539}],"concepts":[{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.7893000245094299},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.7871999740600586},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.75},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7020000219345093},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6603000164031982},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.527999997138977},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.508400022983551},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.484499990940094},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.4505000114440918},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.44760000705718994},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.43560001254081726},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.4275999963283539},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4246000051498413},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.41589999198913574},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41290000081062317},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4018999934196472},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.37709999084472656},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.3589000105857849},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.3206999897956848},{"id":"https://openalex.org/C2776434776","wikidata":"https://www.wikidata.org/wiki/Q19246213","display_name":"Domain adaptation","level":3,"score":0.3199000060558319},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3025999963283539},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.2939999997615814},{"id":"https://openalex.org/C504749915","wikidata":"https://www.wikidata.org/wiki/Q9010971","display_name":"Speech technology","level":3,"score":0.2858999967575073},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.28119999170303345},{"id":"https://openalex.org/C103824480","wikidata":"https://www.wikidata.org/wiki/Q185889","display_name":"Time domain","level":2,"score":0.2752000093460083},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.2630000114440918},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.26170000433921814},{"id":"https://openalex.org/C43617652","wikidata":"https://www.wikidata.org/wiki/Q7575399","display_name":"Speech production","level":2,"score":0.26089999079704285},{"id":"https://openalex.org/C19118579","wikidata":"https://www.wikidata.org/wiki/Q786423","display_name":"Frequency domain","level":2,"score":0.25270000100135803}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.21437/interspeech.2025-2725","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2025-2725","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2025","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2508.01847","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.01847","pdf_url":"https://arxiv.org/pdf/2508.01847","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2508.01847","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.01847","pdf_url":"https://arxiv.org/pdf/2508.01847","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0,24,101],"paper":[1],"introduces":[2],"a":[3,27,33,38,78],"novel":[4],"application":[5],"of":[6,106],"Test-Time":[7],"Training":[8],"(TTT)":[9],"for":[10,68,113],"Speech":[11],"Enhancement,":[12],"addressing":[13],"the":[14,53,66,98,104],"challenges":[15],"posed":[16],"by":[17,51],"unpredictable":[18],"noise":[19],"conditions":[20],"and":[21,82,87,118],"domain":[22],"shifts.":[23],"method":[25],"combines":[26],"main":[28],"speech":[29,94,109,120],"enhancement":[30],"task":[31,36],"with":[32],"self-supervised":[34,55],"auxiliary":[35],"in":[37,108,116],"Y-shaped":[39],"architecture.":[40],"The":[41],"model":[42],"dynamically":[43],"adapts":[44],"to":[45],"new":[46],"domains":[47],"during":[48],"inference":[49],"time":[50],"optimizing":[52],"proposed":[54],"tasks":[56],"like":[57],"noise-augmented":[58],"signal":[59],"reconstruction":[60],"or":[61],"masked":[62],"spectrogram":[63],"prediction,":[64],"bypassing":[65],"need":[67],"labeled":[69],"data.":[70],"We":[71],"further":[72],"introduce":[73],"various":[74],"TTT":[75,107],"strategies":[76],"offering":[77],"trade-off":[79],"between":[80],"adaptation":[81],"efficiency.":[83],"Evaluations":[84],"across":[85,93],"synthetic":[86],"real-world":[88],"datasets":[89],"show":[90],"consistent":[91],"improvements":[92],"quality":[95],"metrics,":[96],"outperforming":[97],"baseline":[99],"model.":[100],"work":[102],"highlights":[103],"effectiveness":[105],"enhancement,":[110],"providing":[111],"insights":[112],"future":[114],"research":[115],"adaptive":[117],"robust":[119],"processing.":[121]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-23T00:00:00"}
