{"id":"https://openalex.org/W4387870312","doi":"https://doi.org/10.1109/mlsp55844.2023.10285923","title":"Improved Vocal Effort Transfer Vector Estimation For Vocal Effort-Robust Speaker Verification","display_name":"Improved Vocal Effort Transfer Vector Estimation For Vocal Effort-Robust Speaker Verification","publication_year":2023,"publication_date":"2023-09-17","ids":{"openalex":"https://openalex.org/W4387870312","doi":"https://doi.org/10.1109/mlsp55844.2023.10285923"},"language":"en","primary_location":{"id":"doi:10.1109/mlsp55844.2023.10285923","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/mlsp55844.2023.10285923","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE 33rd International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2305.02147","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048592470","display_name":"Iv\u00e1n L\u00f3pez\u2010Espejo","orcid":"https://orcid.org/0000-0001-8634-7897"},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]},{"id":"https://openalex.org/I891191580","display_name":"Aalborg University","ror":"https://ror.org/04m5j1k67","country_code":"DK","type":"education","lineage":["https://openalex.org/I891191580"]}],"countries":["DK","US"],"is_corresponding":true,"raw_author_name":"Iv\u00e1n L\u00f3pez-Espejo","raw_affiliation_strings":["Aalborg University,Department of Electronic Systems,Denmark","Center for Robust Speech Systems (CRSS), The University of Texas at Dallas, USA","Department of Electronic Systems, Aalborg University, Denmark"],"affiliations":[{"raw_affiliation_string":"Aalborg University,Department of Electronic Systems,Denmark","institution_ids":["https://openalex.org/I891191580"]},{"raw_affiliation_string":"Center for Robust Speech Systems (CRSS), The University of Texas at Dallas, USA","institution_ids":["https://openalex.org/I162577319"]},{"raw_affiliation_string":"Department of Electronic Systems, Aalborg University, Denmark","institution_ids":["https://openalex.org/I891191580"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057111078","display_name":"Santi Prieto","orcid":null},"institutions":[{"id":"https://openalex.org/I2800821610","display_name":"Government of Spain","ror":"https://ror.org/038jjxj40","country_code":"ES","type":"government","lineage":["https://openalex.org/I2800821610"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Santi Prieto","raw_affiliation_strings":["VeriDas | das-Nano,Navarre,Spain","VeriDas | das-Nano, Navarre, Spain"],"affiliations":[{"raw_affiliation_string":"VeriDas | das-Nano,Navarre,Spain","institution_ids":["https://openalex.org/I2800821610"]},{"raw_affiliation_string":"VeriDas | das-Nano, Navarre, Spain","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101903408","display_name":"Alfonso Ortega","orcid":"https://orcid.org/0000-0002-3886-7748"},"institutions":[{"id":"https://openalex.org/I255234318","display_name":"Universidad de Zaragoza","ror":"https://ror.org/012a91z28","country_code":"ES","type":"education","lineage":["https://openalex.org/I255234318"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Alfonso Ortega","raw_affiliation_strings":["ViVoLab, AragArag&#x00F3;nn Institute for Engineering Research (I3A) University of Zaragoza,Spain"],"affiliations":[{"raw_affiliation_string":"ViVoLab, AragArag&#x00F3;nn Institute for Engineering Research (I3A) University of Zaragoza,Spain","institution_ids":["https://openalex.org/I255234318"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036493563","display_name":"Eduardo Lleida","orcid":"https://orcid.org/0000-0001-9137-4013"},"institutions":[{"id":"https://openalex.org/I255234318","display_name":"Universidad de Zaragoza","ror":"https://ror.org/012a91z28","country_code":"ES","type":"education","lineage":["https://openalex.org/I255234318"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Eduardo Lleida","raw_affiliation_strings":["ViVoLab, AragArag&#x00F3;nn Institute for Engineering Research (I3A) University of Zaragoza,Spain"],"affiliations":[{"raw_affiliation_string":"ViVoLab, AragArag&#x00F3;nn Institute for Engineering Research (I3A) University of Zaragoza,Spain","institution_ids":["https://openalex.org/I255234318"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5048592470"],"corresponding_institution_ids":["https://openalex.org/I162577319","https://openalex.org/I891191580"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14184874,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/estimator","display_name":"Estimator","score":0.6959104537963867},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6557744741439819},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6374192237854004},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.554405689239502},{"id":"https://openalex.org/keywords/minimum-mean-square-error","display_name":"Minimum mean square error","score":0.5251278877258301},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.45530077815055847},{"id":"https://openalex.org/keywords/compensation","display_name":"Compensation (psychology)","score":0.4507260322570801},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.44945091009140015},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3325080871582031},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.29182329773902893},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2413434088230133},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.13044807314872742}],"concepts":[{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.6959104537963867},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6557744741439819},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6374192237854004},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.554405689239502},{"id":"https://openalex.org/C90652560","wikidata":"https://www.wikidata.org/wiki/Q11091747","display_name":"Minimum mean square error","level":3,"score":0.5251278877258301},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.45530077815055847},{"id":"https://openalex.org/C2780023022","wikidata":"https://www.wikidata.org/wiki/Q1338171","display_name":"Compensation (psychology)","level":2,"score":0.4507260322570801},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.44945091009140015},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3325080871582031},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29182329773902893},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2413434088230133},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.13044807314872742},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C11171543","wikidata":"https://www.wikidata.org/wiki/Q41630","display_name":"Psychoanalysis","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/mlsp55844.2023.10285923","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/mlsp55844.2023.10285923","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE 33rd International Workshop on Machine Learning for Signal Processing (MLSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:publications/26705d0e-fc0a-457c-b1e3-fd93185b88d5","is_oa":true,"landing_page_url":"https://vbn.aau.dk/da/publications/26705d0e-fc0a-457c-b1e3-fd93185b88d5","pdf_url":"https://arxiv.org/pdf/2305.02147","source":{"id":"https://openalex.org/S4306401731","display_name":"VBN Forskningsportal (Aalborg Universitet)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I891191580","host_organization_name":"Aalborg University","host_organization_lineage":["https://openalex.org/I891191580"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Espejo, I L, Prieto-Calero, S, Ortega, A & Lleida, E 2023, Improved Vocal Effort Transfer Vector Estimation for Vocal Effort-Robust Speaker Verification. in D Comminiello & M Scarpiniti (eds), Proceedings of the 2023 IEEE 33rd International Workshop on Machine Learning for Signal Processing, MLSP 2023., 10285923, IEEE (Institute of Electrical and Electronics Engineers), IEEE Workshop on Machine Learning for Signal Processing, 2023 IEEE 33rd International Workshop on Machine Learning for Signal Processing (MLSP), Rom, Italy, 17/09/2023. https://doi.org/10.1109/MLSP55844.2023.10285923","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:oai:pure.atira.dk:publications/26705d0e-fc0a-457c-b1e3-fd93185b88d5","is_oa":true,"landing_page_url":"https://vbn.aau.dk/da/publications/26705d0e-fc0a-457c-b1e3-fd93185b88d5","pdf_url":"https://arxiv.org/pdf/2305.02147","source":{"id":"https://openalex.org/S4306401731","display_name":"VBN Forskningsportal (Aalborg Universitet)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I891191580","host_organization_name":"Aalborg University","host_organization_lineage":["https://openalex.org/I891191580"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Espejo, I L, Prieto-Calero, S, Ortega, A & Lleida, E 2023, Improved Vocal Effort Transfer Vector Estimation for Vocal Effort-Robust Speaker Verification. in D Comminiello & M Scarpiniti (eds), Proceedings of the 2023 IEEE 33rd International Workshop on Machine Learning for Signal Processing, MLSP 2023., 10285923, IEEE (Institute of Electrical and Electronics Engineers), IEEE Workshop on Machine Learning for Signal Processing, 2023 IEEE 33rd International Workshop on Machine Learning for Signal Processing (MLSP), Rom, Italy, 17/09/2023. https://doi.org/10.1109/MLSP55844.2023.10285923","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.7799999713897705,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4387870312.pdf"},"referenced_works_count":18,"referenced_works":["https://openalex.org/W1585956913","https://openalex.org/W1963783625","https://openalex.org/W1997063331","https://openalex.org/W2075208738","https://openalex.org/W2099881084","https://openalex.org/W2148154194","https://openalex.org/W2166943505","https://openalex.org/W2808631503","https://openalex.org/W2969985801","https://openalex.org/W3024869864","https://openalex.org/W3094374485","https://openalex.org/W3094655774","https://openalex.org/W3206189675","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W4220663496","https://openalex.org/W4301253858","https://openalex.org/W6684352069"],"related_works":["https://openalex.org/W2081900870","https://openalex.org/W2037549926","https://openalex.org/W2379084545","https://openalex.org/W2345479200","https://openalex.org/W2183306018","https://openalex.org/W2849310602","https://openalex.org/W3006008237","https://openalex.org/W2556771176","https://openalex.org/W2402949781","https://openalex.org/W2640905660"],"abstract_inverted_index":{"Despite":[0],"the":[1,48,52,103],"maturity":[2],"of":[3,51],"modern":[4],"speaker":[5,32,83],"verification":[6,84],"technology,":[7],"its":[8],"performance":[9],"still":[10],"significantly":[11],"degrades":[12],"when":[13,115],"facing":[14],"non-neutrally-phonated":[15],"(e.g.,":[16],"shouted":[17,117],"and":[18,57,61,109,118],"whispered)":[19],"speech.":[20],"To":[21],"address":[22],"this":[23,26],"issue,":[24],"in":[25,63],"paper,":[27],"we":[28],"propose":[29],"a":[30,38,64,81,87,98],"new":[31],"embedding":[33,59,100],"compensation":[34,101],"method":[35,46],"based":[36],"on":[37],"minimum":[39],"mean":[40],"square":[41],"error":[42,112],"(MMSE)":[43],"estimator.":[44],"This":[45],"models":[47],"joint":[49],"distribution":[50],"vocal":[53],"effort":[54],"transfer":[55],"vector":[56],"nonneutrally-phonated":[58],"spaces":[60],"operates":[62],"principal":[65],"component":[66],"analysis":[67],"domain":[68],"to":[69],"cope":[70],"with":[71,97],"non-neutrallyphonated":[72],"speech":[73,93],"data":[74],"scarcity.":[75],"Experiments":[76],"are":[77],"carried":[78],"out":[79],"using":[80],"cutting-edge":[82],"system":[85],"integrating":[86],"powerful":[88],"self-supervised":[89],"pre-trained":[90],"model":[91],"for":[92],"representation.":[94],"In":[95],"comparison":[96],"state-of-the-art":[99],"method,":[102],"proposed":[104],"MMSE":[105],"estimator":[106],"yields":[107],"superior":[108],"competitive":[110],"equal":[111],"rate":[113],"results":[114],"tackling":[116],"whispered":[119],"speech,":[120],"respectively.":[121]},"counts_by_year":[],"updated_date":"2026-03-11T14:59:36.786465","created_date":"2025-10-10T00:00:00"}
