{"id":"https://openalex.org/W3008104819","doi":"https://doi.org/10.1109/asru46091.2019.9004029","title":"Short Utterance Compensation in Speaker Verification via Cosine-Based Teacher-Student Learning of Speaker Embeddings","display_name":"Short Utterance Compensation in Speaker Verification via Cosine-Based Teacher-Student Learning of Speaker Embeddings","publication_year":2019,"publication_date":"2019-12-01","ids":{"openalex":"https://openalex.org/W3008104819","doi":"https://doi.org/10.1109/asru46091.2019.9004029","mag":"3008104819"},"language":"en","primary_location":{"id":"doi:10.1109/asru46091.2019.9004029","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru46091.2019.9004029","pdf_url":null,"source":{"id":"https://openalex.org/S4306498489","display_name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091834525","display_name":"Jee-weon Jung","orcid":"https://orcid.org/0000-0003-0505-2988"},"institutions":[{"id":"https://openalex.org/I124633538","display_name":"University of Seoul","ror":"https://ror.org/05en5nh73","country_code":"KR","type":"education","lineage":["https://openalex.org/I124633538"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Jee-Weon Jung","raw_affiliation_strings":["School of Computer Science, University of Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Seoul, South Korea","institution_ids":["https://openalex.org/I124633538"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070613375","display_name":"Hee-Soo Heo","orcid":"https://orcid.org/0000-0003-1567-123X"},"institutions":[{"id":"https://openalex.org/I124633538","display_name":"University of Seoul","ror":"https://ror.org/05en5nh73","country_code":"KR","type":"education","lineage":["https://openalex.org/I124633538"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hee-Soo Heo","raw_affiliation_strings":["School of Computer Science, University of Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Seoul, South Korea","institution_ids":["https://openalex.org/I124633538"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103211980","display_name":"Hye-jin Shim","orcid":null},"institutions":[{"id":"https://openalex.org/I124633538","display_name":"University of Seoul","ror":"https://ror.org/05en5nh73","country_code":"KR","type":"education","lineage":["https://openalex.org/I124633538"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Hye-Jin Shim","raw_affiliation_strings":["School of Computer Science, University of Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Seoul, South Korea","institution_ids":["https://openalex.org/I124633538"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030475312","display_name":"Ha-Jin Yu","orcid":"https://orcid.org/0000-0003-3657-0665"},"institutions":[{"id":"https://openalex.org/I124633538","display_name":"University of Seoul","ror":"https://ror.org/05en5nh73","country_code":"KR","type":"education","lineage":["https://openalex.org/I124633538"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Ha-Jin Yu","raw_affiliation_strings":["School of Computer Science, University of Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Seoul, South Korea","institution_ids":["https://openalex.org/I124633538"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5091834525"],"corresponding_institution_ids":["https://openalex.org/I124633538"],"apc_list":null,"apc_paid":null,"fwci":3.3604,"has_fulltext":false,"cited_by_count":40,"citation_normalized_percentile":{"value":0.94081538,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"335","last_page":"341"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/utterance","display_name":"Utterance","score":0.8375095725059509},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7632137537002563},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6518155336380005},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6017819046974182},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.5895165205001831},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.5478441715240479},{"id":"https://openalex.org/keywords/speaker-verification","display_name":"Speaker verification","score":0.524962842464447},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5247752070426941},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.478786826133728},{"id":"https://openalex.org/keywords/trigonometric-functions","display_name":"Trigonometric functions","score":0.4639318585395813},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.44861119985580444},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.4470154047012329},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4328283369541168},{"id":"https://openalex.org/keywords/compensation","display_name":"Compensation (psychology)","score":0.4225162863731384},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4140920341014862},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.39384597539901733},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10442748665809631}],"concepts":[{"id":"https://openalex.org/C2775852435","wikidata":"https://www.wikidata.org/wiki/Q258403","display_name":"Utterance","level":2,"score":0.8375095725059509},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7632137537002563},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6518155336380005},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6017819046974182},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.5895165205001831},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.5478441715240479},{"id":"https://openalex.org/C2982762665","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker verification","level":3,"score":0.524962842464447},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5247752070426941},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.478786826133728},{"id":"https://openalex.org/C178009071","wikidata":"https://www.wikidata.org/wiki/Q93344","display_name":"Trigonometric functions","level":2,"score":0.4639318585395813},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.44861119985580444},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.4470154047012329},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4328283369541168},{"id":"https://openalex.org/C2780023022","wikidata":"https://www.wikidata.org/wiki/Q1338171","display_name":"Compensation (psychology)","level":2,"score":0.4225162863731384},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4140920341014862},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.39384597539901733},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10442748665809631},{"id":"https://openalex.org/C11171543","wikidata":"https://www.wikidata.org/wiki/Q41630","display_name":"Psychoanalysis","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru46091.2019.9004029","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru46091.2019.9004029","pdf_url":null,"source":{"id":"https://openalex.org/S4306498489","display_name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7900000214576721}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":31,"referenced_works":["https://openalex.org/W67277430","https://openalex.org/W1510078097","https://openalex.org/W2071175130","https://openalex.org/W2150769028","https://openalex.org/W2187089797","https://openalex.org/W2194775991","https://openalex.org/W2271840356","https://openalex.org/W2395820172","https://openalex.org/W2402040300","https://openalex.org/W2402144811","https://openalex.org/W2403265407","https://openalex.org/W2607694392","https://openalex.org/W2612434969","https://openalex.org/W2726515241","https://openalex.org/W2747900149","https://openalex.org/W2770454110","https://openalex.org/W2802488037","https://openalex.org/W2889129739","https://openalex.org/W2889151164","https://openalex.org/W2889341949","https://openalex.org/W2890964092","https://openalex.org/W2901616798","https://openalex.org/W2953384591","https://openalex.org/W2963096935","https://openalex.org/W2963523217","https://openalex.org/W2963865582","https://openalex.org/W2972909277","https://openalex.org/W6602762607","https://openalex.org/W6712488624","https://openalex.org/W6712847557","https://openalex.org/W6713380023"],"related_works":["https://openalex.org/W126212742","https://openalex.org/W2206035908","https://openalex.org/W1948687848","https://openalex.org/W2953417386","https://openalex.org/W2162158162","https://openalex.org/W4235705411","https://openalex.org/W1521299571","https://openalex.org/W4247736853","https://openalex.org/W1493012537","https://openalex.org/W2144470400"],"abstract_inverted_index":{"The":[0,68,136,183],"short":[1,37,58],"duration":[2,38],"of":[3,9,18,39,71,115,125,153,159],"an":[4,27,46,130],"input":[5],"utterance":[6,59],"is":[7,75],"one":[8],"the":[10,16,62,72,78,81,85,100,104],"most":[11],"critical":[12],"threats":[13],"that":[14,33,83,148,170],"degrade":[15],"performance":[17],"speaker":[19,30,86,101,160,178],"verification":[20,31],"systems.":[21],"This":[22],"study":[23],"aimed":[24],"to":[25,57,76,192],"develop":[26],"integrated":[28],"text-independent":[29],"system":[32,74,97],"inputs":[34],"utterances":[35],"with":[36],"2":[40],"seconds":[41],"or":[42],"less.":[43],"We":[44],"propose":[45],"approach":[47,138],"using":[48,120,133,166],"a":[49,95,113,141,193],"teacher-student":[50,146],"learning":[51,147],"framework":[52],"for":[53,61,145],"this":[54],"goal,":[55],"applied":[56],"compensation":[60,79],"first":[63],"time":[64],"in":[65,89],"our":[66],"knowledge.":[67],"core":[69],"concept":[70],"proposed":[73,105,137,184],"conduct":[77],"throughout":[80],"network":[82],"extracts":[84,129],"embedding,":[87],"mainly":[88],"phonetic-level,":[90],"rather":[91],"than":[92],"compensating":[93],"via":[94],"separate":[96],"after":[98],"extracting":[99],"embedding.":[102],"In":[103],"architecture,":[106],"phonetic-level":[107,134],"features":[108],"where":[109],"each":[110],"feature":[111,132],"represents":[112],"segment":[114],"130":[116],"ms":[117],"are":[118],"extracted":[119],"convolutional":[121],"layers.":[122,162],"A":[123],"layer":[124],"gated":[126],"recurrent":[127],"units":[128],"utterance-level":[131],"features.":[135],"also":[139],"adopts":[140],"new":[142],"objective":[143],"function":[144],"considers":[149],"both":[150],"Kullback-Leibler":[151],"divergence":[152],"output":[154,177],"layers":[155],"and":[156,176],"cosine":[157],"distance":[158],"embeddings":[161,179],"Experiments":[163],"were":[164],"conducted":[165],"deep":[167],"neural":[168],"networks":[169],"take":[171],"raw":[172],"waveforms":[173],"as":[174],"input,":[175],"on":[180],"VoxCelebl":[181],"dataset.":[182],"model":[185],"showed":[186],"16.6":[187],"%":[188],"relative":[189],"improvement":[190],"compared":[191],"baseline":[194],"approach.":[195]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":11},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
