{"id":"https://openalex.org/W4224007376","doi":"https://doi.org/10.21437/interspeech.2022-96","title":"Speaker-Aware Mixture of Mixtures Training for Weakly Supervised Speaker Extraction","display_name":"Speaker-Aware Mixture of Mixtures Training for Weakly Supervised Speaker Extraction","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4224007376","doi":"https://doi.org/10.21437/interspeech.2022-96"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-96","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-96","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100962843","display_name":"Zifeng Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zifeng Zhao","raw_affiliation_strings":["ADSPLAB, School of ECE, Peking University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"ADSPLAB, School of ECE, Peking University, Shenzhen, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038895203","display_name":"Rongzhi Gu","orcid":"https://orcid.org/0000-0003-1861-9170"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rongzhi Gu","raw_affiliation_strings":["ADSPLAB, School of ECE, Peking University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"ADSPLAB, School of ECE, Peking University, Shenzhen, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043555011","display_name":"Dongchao Yang","orcid":"https://orcid.org/0000-0002-8905-224X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongchao Yang","raw_affiliation_strings":["ADSPLAB, School of ECE, Peking University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"ADSPLAB, School of ECE, Peking University, Shenzhen, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068192693","display_name":"Jinchuan Tian","orcid":"https://orcid.org/0000-0002-2129-471X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinchuan Tian","raw_affiliation_strings":["ADSPLAB, School of ECE, Peking University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"ADSPLAB, School of ECE, Peking University, Shenzhen, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002795838","display_name":"Yuexian Zou","orcid":"https://orcid.org/0000-0001-9999-6140"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuexian Zou","raw_affiliation_strings":["ADSPLAB, School of ECE, Peking University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"ADSPLAB, School of ECE, Peking University, Shenzhen, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100962843"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.2458,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.34218009,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"5318","last_page":"5322"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.765606164932251},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.7555028200149536},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7000047564506531},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.5895869135856628},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.5488489270210266},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.4801853895187378},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.4728073179721832},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4700899124145508},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4240923821926117},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.08224323391914368},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.06477609276771545}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.765606164932251},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.7555028200149536},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7000047564506531},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.5895869135856628},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.5488489270210266},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.4801853895187378},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.4728073179721832},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4700899124145508},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4240923821926117},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.08224323391914368},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.06477609276771545},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2022-96","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-96","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7300000190734863,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320329791","display_name":"Shenzhen Fundamental Research Program","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W2221409856","https://openalex.org/W2460742184","https://openalex.org/W2734774145","https://openalex.org/W2891833136","https://openalex.org/W2939771864","https://openalex.org/W2952218014","https://openalex.org/W2962866211","https://openalex.org/W2963189033","https://openalex.org/W2963242190","https://openalex.org/W2964058413","https://openalex.org/W2972541922","https://openalex.org/W2972767900","https://openalex.org/W2973062255","https://openalex.org/W2988200020","https://openalex.org/W3015636705","https://openalex.org/W3016038135","https://openalex.org/W3027008958","https://openalex.org/W3093839391","https://openalex.org/W3103434036","https://openalex.org/W3197381252","https://openalex.org/W3206116612","https://openalex.org/W4287119748","https://openalex.org/W4287632494"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W66821593","https://openalex.org/W1521299571","https://openalex.org/W4247736853","https://openalex.org/W2162158162","https://openalex.org/W4235705411","https://openalex.org/W1493012537","https://openalex.org/W1999004162","https://openalex.org/W2144470400","https://openalex.org/W2911612049"],"abstract_inverted_index":{"Dominant":[0],"researches":[1],"adopt":[2],"supervised":[3,157],"training":[4,30,50],"for":[5],"speaker":[6,36,54],"extraction,":[7],"while":[8],"the":[9,33,49,57,88,95,100,109,135],"scarcity":[10],"of":[11,28,35,51,122],"ideally":[12],"clean":[13,123,145],"corpus":[14],"and":[15,43,76],"channel":[16],"mismatch":[17],"problem":[18],"are":[19],"rarely":[20],"considered.To":[21],"this":[22],"end,":[23],"we":[24],"propose":[25],"speaker-aware":[26,65],"mixture":[27],"mixtures":[29,66],"(SAMoM),":[31],"utilizing":[32],"consistency":[34],"identity":[37,110],"among":[38],"target":[39,44,83],"source,":[40],"enrollment":[41,77,81],"utterance":[42],"estimate":[45],"to":[46,143],"weakly":[47],"supervise":[48],"a":[52,104,115,119,150,160],"deep":[53],"extractor.In":[55],"SAMoM,":[56],"input":[58,89],"is":[59,85],"constructed":[60],"by":[61,80,91],"mixing":[62],"up":[63],"different":[64],"(SAMs),":[67],"each":[68],"contains":[69],"multiple":[70],"speakers":[71],"with":[72,108,118],"their":[73],"identities":[74],"known":[75],"utterances":[78],"available.Informed":[79],"utterances,":[82],"speech":[84],"extracted":[86],"from":[87],"one":[90],"one,":[92],"such":[93],"that":[94,134],"estimated":[96],"targets":[97],"can":[98],"approximate":[99],"original":[101],"SAMs":[102],"after":[103],"remix":[105],"in":[106,114,127,159],"accordance":[107],"consistency.Moreover,":[111],"using":[112],"SAMoM":[113],"semi-supervised":[116],"setting":[117],"certain":[120],"amount":[121],"sources":[124,146],"enables":[125],"application":[126],"noisy":[128],"scenarios.Extensive":[129],"experiments":[130],"on":[131,163],"Libri2Mix":[132],"show":[133],"proposed":[136],"method":[137],"achieves":[138],"promising":[139],"results":[140],"without":[141],"access":[142],"any":[144],"(11.06dBSI-SDRi)":[147],"1":[148],".With":[149],"domain":[151],"adaptation,":[152],"our":[153],"approach":[154],"even":[155],"outperformed":[156],"framework":[158],"cross-domain":[161],"evaluation":[162],"AISHELL-1.":[164]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
