{"id":"https://openalex.org/W7148434422","doi":"https://doi.org/10.1109/asru65441.2025.11434721","title":"State-of-the-art Embeddings with Video-free Segmentation of the Source VoxCeleb Data","display_name":"State-of-the-art Embeddings with Video-free Segmentation of the Source VoxCeleb Data","publication_year":2025,"publication_date":"2025-12-06","ids":{"openalex":"https://openalex.org/W7148434422","doi":"https://doi.org/10.1109/asru65441.2025.11434721"},"language":null,"primary_location":{"id":"doi:10.1109/asru65441.2025.11434721","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434721","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5106528898","display_name":"Sara Barahona","orcid":"https://orcid.org/0009-0001-8519-0549"},"institutions":[{"id":"https://openalex.org/I904013037","display_name":"Universidad Europea de Madrid","ror":"https://ror.org/04dp46240","country_code":"ES","type":"education","lineage":["https://openalex.org/I904013037"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Sara Barahona","raw_affiliation_strings":["Universidad Aut&#x00F3;noma de Madrid,AUDIAS Research Group,Madrid,Spain"],"affiliations":[{"raw_affiliation_string":"Universidad Aut&#x00F3;noma de Madrid,AUDIAS Research Group,Madrid,Spain","institution_ids":["https://openalex.org/I904013037"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132827043","display_name":"Ladislav Mo\u0161nery","orcid":null},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Ladislav Mo\u0161nery","raw_affiliation_strings":["Brno University of Technology,Faculty of Information Technology, Speech@FIT,Czechia"],"affiliations":[{"raw_affiliation_string":"Brno University of Technology,Faculty of Information Technology, Speech@FIT,Czechia","institution_ids":["https://openalex.org/I60587646"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132791636","display_name":"Themos Stafylakisz","orcid":null},"institutions":[{"id":"https://openalex.org/I4210156054","display_name":"Athena Research and Innovation Center In Information Communication & Knowledge Technologies","ror":"https://ror.org/0576by029","country_code":"GR","type":"facility","lineage":["https://openalex.org/I4210156054"]},{"id":"https://openalex.org/I73142707","display_name":"Athens University of Economics and Business","ror":"https://ror.org/03s262162","country_code":"GR","type":"education","lineage":["https://openalex.org/I73142707"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Themos Stafylakisz","raw_affiliation_strings":["Athens University of Economics and Business | Omilia | Archimedes/Athena R.C,Greece"],"affiliations":[{"raw_affiliation_string":"Athens University of Economics and Business | Omilia | Archimedes/Athena R.C,Greece","institution_ids":["https://openalex.org/I4210156054","https://openalex.org/I73142707"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132821775","display_name":"Old\u0159ich Plchoty","orcid":null},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Old\u0159ich Plchoty","raw_affiliation_strings":["Brno University of Technology,Faculty of Information Technology, Speech@FIT,Czechia"],"affiliations":[{"raw_affiliation_string":"Brno University of Technology,Faculty of Information Technology, Speech@FIT,Czechia","institution_ids":["https://openalex.org/I60587646"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132816804","display_name":"Junyi Pengy","orcid":null},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Junyi Pengy","raw_affiliation_strings":["Brno University of Technology,Faculty of Information Technology, Speech@FIT,Czechia"],"affiliations":[{"raw_affiliation_string":"Brno University of Technology,Faculty of Information Technology, Speech@FIT,Czechia","institution_ids":["https://openalex.org/I60587646"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132807227","display_name":"Luk\u00e1\u0161 Burgety","orcid":null},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Luk\u00e1\u0161 Burgety","raw_affiliation_strings":["Brno University of Technology,Faculty of Information Technology, Speech@FIT,Czechia"],"affiliations":[{"raw_affiliation_string":"Brno University of Technology,Faculty of Information Technology, Speech@FIT,Czechia","institution_ids":["https://openalex.org/I60587646"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5132804267","display_name":"Jan \u010cernocky\u00fd","orcid":null},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Jan \u010cernocky\u00fd","raw_affiliation_strings":["Brno University of Technology,Faculty of Information Technology, Speech@FIT,Czechia"],"affiliations":[{"raw_affiliation_string":"Brno University of Technology,Faculty of Information Technology, Speech@FIT,Czechia","institution_ids":["https://openalex.org/I60587646"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5106528898"],"corresponding_institution_ids":["https://openalex.org/I904013037"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.87353988,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.47290000319480896,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.47290000319480896,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.4050000011920929,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.03200000151991844,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7752000093460083},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.4805000126361847},{"id":"https://openalex.org/keywords/timestamp","display_name":"Timestamp","score":0.4668999910354614},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.45489999651908875},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4077000021934509},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.4050999879837036},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.3855000138282776}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7752000093460083},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7350999712944031},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.555400013923645},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4871000051498413},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.4805000126361847},{"id":"https://openalex.org/C113954288","wikidata":"https://www.wikidata.org/wiki/Q186885","display_name":"Timestamp","level":2,"score":0.4668999910354614},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.45489999651908875},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4077000021934509},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.4050999879837036},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.3855000138282776},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3564000129699707},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34139999747276306},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.2874999940395355},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.27469998598098755},{"id":"https://openalex.org/C207030507","wikidata":"https://www.wikidata.org/wiki/Q2266173","display_name":"Speech segmentation","level":3,"score":0.27129998803138733},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.258899986743927}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru65441.2025.11434721","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434721","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320316083","display_name":"Tencent","ror":"https://ror.org/00hhjss72"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W2110119381","https://openalex.org/W2159591770","https://openalex.org/W2302255633","https://openalex.org/W2726515241","https://openalex.org/W2730845691","https://openalex.org/W2808631503","https://openalex.org/W2889233255","https://openalex.org/W2969985801","https://openalex.org/W3024869864","https://openalex.org/W3157070662","https://openalex.org/W3159481202","https://openalex.org/W3197580070","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W4296069289","https://openalex.org/W4319862271","https://openalex.org/W4372340947","https://openalex.org/W4385822353","https://openalex.org/W4385822356","https://openalex.org/W4388579618","https://openalex.org/W4400617062","https://openalex.org/W4402112366"],"related_works":[],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,19],"refine":[4],"and":[5,30,52,58,110,131],"validate":[6],"our":[7,113],"method":[8,64,114],"for":[9,107],"training":[10,73,126],"speaker":[11,69,108],"embedding":[12,53,129],"extractors":[13,54,75,130],"using":[14],"weak":[15],"annotations.":[16],"More":[17],"specifically,":[18],"use":[20,117],"only":[21],"the":[22,26,31,34,38,46,63,74,82,98,105,116],"audio":[23],"stream":[24],"of":[25,33,118,127],"source":[27],"VoxCeleb":[28,83],"videos":[29],"names":[32],"celebrities":[35],"without":[36],"knowing":[37],"time":[39],"intervals":[40],"in":[41,45,68,76],"which":[42,100],"they":[43],"appear":[44],"recording.":[47],"We":[48,60,85],"experiment":[49],"with":[50,72],"hyperparameters":[51],"based":[55],"on":[56,81],"ResNet":[57],"WavLM.":[59],"show":[61],"that":[62],"achieves":[65],"state-of-the-art":[66,128],"results":[67],"verification,":[70],"comparable":[71],"a":[77,133],"standard":[78],"supervised":[79],"way":[80],"dataset.":[84],"also":[86],"extend":[87],"it":[88],"by":[89],"considering":[90],"segments":[91],"be-longing":[92],"to":[93,136],"unknown":[94],"speakers":[95],"appearing":[96],"alongside":[97],"celebrities,":[99],"are":[101],"typically":[102],"discarded.":[103],"Removing":[104],"need":[106],"timestamps":[109],"multimodal":[111],"alignment,":[112],"unlocks":[115],"large-scale":[119],"weakly":[120],"labeled":[121],"speech":[122],"data,":[123],"enabling":[124],"direct":[125],"offering":[132],"visual-free":[134],"alternative":[135],"VoxCeleb-style":[137],"dataset":[138],"creation.":[139]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2026-04-03T00:00:00"}
