{"id":"https://openalex.org/W4295365513","doi":"https://doi.org/10.21437/interspeech.2022-10825","title":"Overlapped speech and gender detection with WavLM pre-trained features","display_name":"Overlapped speech and gender detection with WavLM pre-trained features","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4295365513","doi":"https://doi.org/10.21437/interspeech.2022-10825"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2022-10825","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-10825","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2209.04167","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044846779","display_name":"Martin Lebourdais","orcid":"https://orcid.org/0000-0001-7150-0588"},"institutions":[{"id":"https://openalex.org/I4210108471","display_name":"Le Mans Universit\u00e9","ror":"https://ror.org/01mtcc283","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210108471"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Martin Lebourdais","raw_affiliation_strings":["LIUM - Laboratoire d'Informatique de l'Universit\u00e9 du Mans (Avenue Laennec 72085 Le Mans cedex 9 - France)"],"affiliations":[{"raw_affiliation_string":"LIUM - Laboratoire d'Informatique de l'Universit\u00e9 du Mans (Avenue Laennec 72085 Le Mans cedex 9 - France)","institution_ids":["https://openalex.org/I4210108471"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028709353","display_name":"Marie Tahon","orcid":"https://orcid.org/0000-0002-6782-0332"},"institutions":[{"id":"https://openalex.org/I4210108471","display_name":"Le Mans Universit\u00e9","ror":"https://ror.org/01mtcc283","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210108471"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Marie Tahon","raw_affiliation_strings":["LIUM - Laboratoire d'Informatique de l'Universit\u00e9 du Mans (Avenue Laennec 72085 Le Mans cedex 9 - France)"],"affiliations":[{"raw_affiliation_string":"LIUM - Laboratoire d'Informatique de l'Universit\u00e9 du Mans (Avenue Laennec 72085 Le Mans cedex 9 - France)","institution_ids":["https://openalex.org/I4210108471"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025212823","display_name":"Antoine Laurent","orcid":"https://orcid.org/0000-0002-2653-1008"},"institutions":[{"id":"https://openalex.org/I4210108471","display_name":"Le Mans Universit\u00e9","ror":"https://ror.org/01mtcc283","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210108471"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Antoine LAURENT","raw_affiliation_strings":["LIUM - Laboratoire d'Informatique de l'Universit\u00e9 du Mans (Avenue Laennec 72085 Le Mans cedex 9 - France)"],"affiliations":[{"raw_affiliation_string":"LIUM - Laboratoire d'Informatique de l'Universit\u00e9 du Mans (Avenue Laennec 72085 Le Mans cedex 9 - France)","institution_ids":["https://openalex.org/I4210108471"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5075235121","display_name":"Sylvain Meignier","orcid":"https://orcid.org/0000-0001-7687-073X"},"institutions":[{"id":"https://openalex.org/I4210108471","display_name":"Le Mans Universit\u00e9","ror":"https://ror.org/01mtcc283","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210108471"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Sylvain Meignier","raw_affiliation_strings":["LIUM - Laboratoire d'Informatique de l'Universit\u00e9 du Mans (Avenue Laennec 72085 Le Mans cedex 9 - France)"],"affiliations":[{"raw_affiliation_string":"LIUM - Laboratoire d'Informatique de l'Universit\u00e9 du Mans (Avenue Laennec 72085 Le Mans cedex 9 - France)","institution_ids":["https://openalex.org/I4210108471"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5044846779"],"corresponding_institution_ids":["https://openalex.org/I4210108471"],"apc_list":null,"apc_paid":null,"fwci":1.1046,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.79778831,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"5010","last_page":"5014"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9088000059127808,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9088000059127808,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9035999774932861,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7906610369682312},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.7202481627464294},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.674444854259491},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.639318585395813},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5771446228027344},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5652725100517273},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5036477446556091},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45158371329307556},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.41968071460723877},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4101948142051697},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.3577616810798645}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7906610369682312},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.7202481627464294},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.674444854259491},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.639318585395813},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5771446228027344},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5652725100517273},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5036477446556091},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45158371329307556},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.41968071460723877},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4101948142051697},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3577616810798645},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.21437/interspeech.2022-10825","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2022-10825","pdf_url":null,"source":{"id":"https://openalex.org/S4363604309","display_name":"Interspeech 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2022","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2209.04167","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.04167","pdf_url":"https://arxiv.org/pdf/2209.04167","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2209.04167","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2209.04167","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2209.04167","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.04167","pdf_url":"https://arxiv.org/pdf/2209.04167","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/5","display_name":"Gender equality","score":0.7699999809265137}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W262275730","https://openalex.org/W2129456397","https://openalex.org/W2168966732","https://openalex.org/W2402428045","https://openalex.org/W2407024733","https://openalex.org/W2591682631","https://openalex.org/W2792764867","https://openalex.org/W2800448574","https://openalex.org/W2890964092","https://openalex.org/W2896457183","https://openalex.org/W2964052309","https://openalex.org/W2968071640","https://openalex.org/W2989863749","https://openalex.org/W3015780472","https://openalex.org/W3015783745","https://openalex.org/W3036601975","https://openalex.org/W3081869477","https://openalex.org/W3157070662","https://openalex.org/W3169320628","https://openalex.org/W3196595845","https://openalex.org/W3197580070","https://openalex.org/W3198827946","https://openalex.org/W3209984917","https://openalex.org/W4285618512"],"related_works":["https://openalex.org/W3135230428","https://openalex.org/W2904739811","https://openalex.org/W2152158029","https://openalex.org/W2012540220","https://openalex.org/W2559837139","https://openalex.org/W1151175420","https://openalex.org/W2166690696","https://openalex.org/W2131711534","https://openalex.org/W2559040841","https://openalex.org/W114661351"],"abstract_inverted_index":{"This":[0,172],"article":[1],"focuses":[2],"on":[3,68,144,154],"overlapped":[4,78],"speech":[5,36,73,79],"and":[6,16,42,82,166,188],"gender":[7,84,110,156],"detection":[8,80,85],"in":[9,18,190],"order":[10],"to":[11,32,39,43,57,75],"study":[12],"interactions":[13],"between":[14,186],"women":[15,187],"men":[17,189],"French":[19,161,191],"audiovisual":[20],"media":[21],"(Gender":[22],"Equality":[23],"Monitoring":[24],"project).":[25],"In":[26,88],"this":[27,89],"application":[28,119],"context,":[29],"we":[30,91],"need":[31],"automatically":[33],"segment":[34],"the":[35,52,63,105,117,160,182],"signal":[37],"according":[38],"speakers":[40,49],"gender,":[41],"identify":[44],"when":[45],"at":[46,51],"least":[47],"two":[48,93],"speak":[50],"same":[53],"time.":[54],"We":[55],"propose":[56],"use":[58,92],"WavLM":[59,132,152],"model":[60],"which":[61,100,137],"has":[62],"advantage":[64],"of":[65,72,159,170,184],"being":[66],"pre-trained":[67,133],"a":[69,83,126,139,155],"huge":[70],"amount":[71],"data,":[74,165],"build":[76],"an":[77,168],"(OSD)":[81],"(GD)":[86],"systems.":[87],"study,":[90],"different":[94],"corpora.":[95],"The":[96,112],"DIHARD":[97],"III":[98],"corpus":[99,114],"is":[101,125,149],"well":[102],"adapted":[103],"for":[104,177],"OSD":[106,123],"task":[107],"but":[108],"lack":[109],"information.":[111],"ALLIES":[113,164],"fits":[115],"with":[116,131,151],"project":[118],"context.":[120],"Our":[121],"best":[122],"system":[124],"Temporal":[127],"Convolutional":[128],"Network":[129],"(TCN)":[130],"features":[134],"as":[135],"input,":[136],"reaches":[138],"new":[140,175],"state-of-the-art":[141],"F1-score":[142],"performance":[143],"DIHARD.":[145],"A":[146],"neural":[147],"GD":[148],"trained":[150],"inputs":[153],"balanced":[157],"subset":[158],"broadcast":[162],"news":[163],"obtains":[167],"accuracy":[169],"97.9%.":[171],"work":[173],"opens":[174],"perspectives":[176],"human":[178],"science":[179],"researchers":[180],"regarding":[181],"differences":[183],"representation":[185],"media.":[192]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2022-09-13T00:00:00"}
