{"id":"https://openalex.org/W4416251818","doi":"https://doi.org/10.1109/waspaa66052.2025.11230957","title":"TGIF: Talker Group-Informed Familiarization of Target Speaker Extraction","display_name":"TGIF: Talker Group-Informed Familiarization of Target Speaker Extraction","publication_year":2025,"publication_date":"2025-10-12","ids":{"openalex":"https://openalex.org/W4416251818","doi":"https://doi.org/10.1109/waspaa66052.2025.11230957"},"language":null,"primary_location":{"id":"doi:10.1109/waspaa66052.2025.11230957","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa66052.2025.11230957","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060785253","display_name":"Tsun-An Hsieh","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tsun-An Hsieh","raw_affiliation_strings":["University of Illinois Urbana-Champaign, Siebel School of Computing and Data Science,Urbana,IL,USA,61801"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign, Siebel School of Computing and Data Science,Urbana,IL,USA,61801","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064582903","display_name":"Minje Kim","orcid":"https://orcid.org/0000-0003-3513-8328"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Minje Kim","raw_affiliation_strings":["University of Illinois Urbana-Champaign, Siebel School of Computing and Data Science,Urbana,IL,USA,61801"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign, Siebel School of Computing and Data Science,Urbana,IL,USA,61801","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5060785253"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19492888,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.6902999877929688,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.6902999877929688,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.27889999747276306,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.007400000002235174,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5467000007629395},{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.5403000116348267},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.4472000002861023},{"id":"https://openalex.org/keywords/mixing","display_name":"Mixing (physics)","score":0.4250999987125397},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.32589998841285706}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7150999903678894},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.633899986743927},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5467000007629395},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.5403000116348267},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.4472000002861023},{"id":"https://openalex.org/C138777275","wikidata":"https://www.wikidata.org/wiki/Q6884054","display_name":"Mixing (physics)","level":2,"score":0.4250999987125397},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4083000123500824},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.32589998841285706},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2897000014781952},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.28870001435279846},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.28780001401901245},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.2743000090122223}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/waspaa66052.2025.11230957","is_oa":false,"landing_page_url":"https://doi.org/10.1109/waspaa66052.2025.11230957","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W1597121597","https://openalex.org/W1983108229","https://openalex.org/W2194775991","https://openalex.org/W2593116425","https://openalex.org/W2696967604","https://openalex.org/W2891833136","https://openalex.org/W2938874599","https://openalex.org/W2948116732","https://openalex.org/W2951130829","https://openalex.org/W2964058413","https://openalex.org/W3015636705","https://openalex.org/W3016361963","https://openalex.org/W3097653961","https://openalex.org/W3162424266","https://openalex.org/W3162538144","https://openalex.org/W3163464523","https://openalex.org/W3164605550","https://openalex.org/W3185109982","https://openalex.org/W3206706278","https://openalex.org/W4205483086","https://openalex.org/W4285258106","https://openalex.org/W4293363567","https://openalex.org/W4379797396","https://openalex.org/W4384080510","https://openalex.org/W4385822555","https://openalex.org/W4386764866","https://openalex.org/W4391876406","https://openalex.org/W4392903251","https://openalex.org/W4401633910","https://openalex.org/W4402112296","https://openalex.org/W4402112506","https://openalex.org/W4402115964","https://openalex.org/W4408354764"],"related_works":[],"abstract_inverted_index":{"State-of-the-art":[0],"target":[1,137],"speaker":[2,138,169],"extraction":[3],"(TSE)":[4],"systems":[5],"are":[6,57],"typically":[7],"designed":[8],"to":[9,11,37,94,133,161],"generalize":[10],"any":[12],"given":[13,168],"mixing":[14],"environment,":[15],"necessitating":[16],"a":[17,20,25,32,52,62,85,99,108,113,124,167,193],"model":[18,116,132],"with":[19,69],"large":[21,125],"enough":[22],"capacity":[23],"as":[24,189],"generalist.":[26],"Personalized":[27],"speech":[28,101,164],"enhancement":[29],"could":[30],"be":[31],"specialized":[33,181],"solution":[34],"that":[35,151],"adapts":[36],"single-user":[38],"scenarios,":[39],"but":[40],"it":[41],"overlooks":[42],"the":[43,70,80,95,119,130,136,140,155,162,177],"practical":[44],"need":[45],"for":[46,61,183],"customization":[47],"in":[48,84],"cases":[49],"where":[50,79,112],"only":[51],"small":[53],"number":[54],"of":[55,77,88,98,166,179],"talkers":[56],"involved,":[58],"e.g.,":[59],"TSE":[60,81,191],"specific":[63],"family.":[64],"We":[65],"address":[66],"this":[67,104],"gap":[68],"proposed":[71,173],"concept,":[72],"talker":[73,142],"group-informed":[74],"familiarization":[75],"(TGIF)":[76],"TSE,":[78],"system":[82],"specializes":[83],"particular":[86,141],"group":[87,143],"users,":[89],"which":[90],"is":[91],"challenging":[92],"due":[93],"inherent":[96],"absence":[97],"clean":[100],"target.":[102],"To":[103],"end,":[105],"we":[106],"employ":[107],"knowledge":[109],"distillation":[110],"approach,":[111],"group-specific":[114],"student":[115,131],"learns":[117],"from":[118,139],"pseudo-clean":[120],"targets":[121],"generated":[122],"by":[123,159],"teacher":[126],"model.":[127],"This":[128],"tailors":[129],"effectively":[134],"extract":[135],"while":[144],"maintaining":[145],"computational":[146],"efficiency.":[147],"Experimental":[148],"results":[149],"demonstrate":[150],"our":[152],"approach":[153],"outperforms":[154],"baseline":[156],"generic":[157],"models":[158],"adapting":[160],"unique":[163],"characteristics":[165],"group.":[170],"Our":[171],"newly":[172],"TGIF":[174],"concept":[175],"underscores":[176],"potential":[178],"developing":[180],"solutions":[182],"diverse":[184],"and":[185],"real-world":[186],"applications,":[187],"such":[188],"on-device":[190],"on":[192],"family-owned":[194],"device.":[195]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-14T00:00:00"}
