{"id":"https://openalex.org/W4319862658","doi":"https://doi.org/10.1109/slt54892.2023.10022897","title":"Combining Contrastive and Non-Contrastive Losses for Fine-Tuning Pretrained Models in Speech Analysis","display_name":"Combining Contrastive and Non-Contrastive Losses for Fine-Tuning Pretrained Models in Speech Analysis","publication_year":2023,"publication_date":"2023-01-09","ids":{"openalex":"https://openalex.org/W4319862658","doi":"https://doi.org/10.1109/slt54892.2023.10022897"},"language":"en","primary_location":{"id":"doi:10.1109/slt54892.2023.10022897","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/slt54892.2023.10022897","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078880407","display_name":"Florian Lux","orcid":"https://orcid.org/0000-0003-4325-5129"},"institutions":[{"id":"https://openalex.org/I100066346","display_name":"University of Stuttgart","ror":"https://ror.org/04vnq7t77","country_code":"DE","type":"education","lineage":["https://openalex.org/I100066346"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Florian Lux","raw_affiliation_strings":["University of Stuttgart, Institute for Natural Language Processing,Germany","University of Stuttgart, Institute for Natural Language Processing, Germany"],"affiliations":[{"raw_affiliation_string":"University of Stuttgart, Institute for Natural Language Processing,Germany","institution_ids":["https://openalex.org/I100066346"]},{"raw_affiliation_string":"University of Stuttgart, Institute for Natural Language Processing, Germany","institution_ids":["https://openalex.org/I100066346"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100757473","display_name":"Ching\u2010Yi Chen","orcid":"https://orcid.org/0000-0002-5776-084X"},"institutions":[{"id":"https://openalex.org/I100066346","display_name":"University of Stuttgart","ror":"https://ror.org/04vnq7t77","country_code":"DE","type":"education","lineage":["https://openalex.org/I100066346"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ching-Yi Chen","raw_affiliation_strings":["University of Stuttgart, Institute for Natural Language Processing,Germany","University of Stuttgart, Institute for Natural Language Processing, Germany"],"affiliations":[{"raw_affiliation_string":"University of Stuttgart, Institute for Natural Language Processing,Germany","institution_ids":["https://openalex.org/I100066346"]},{"raw_affiliation_string":"University of Stuttgart, Institute for Natural Language Processing, Germany","institution_ids":["https://openalex.org/I100066346"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020700841","display_name":"Ngoc Thang Vu","orcid":"https://orcid.org/0000-0001-7893-9147"},"institutions":[{"id":"https://openalex.org/I100066346","display_name":"University of Stuttgart","ror":"https://ror.org/04vnq7t77","country_code":"DE","type":"education","lineage":["https://openalex.org/I100066346"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ngoc Thang Vu","raw_affiliation_strings":["University of Stuttgart, Institute for Natural Language Processing,Germany","University of Stuttgart, Institute for Natural Language Processing, Germany"],"affiliations":[{"raw_affiliation_string":"University of Stuttgart, Institute for Natural Language Processing,Germany","institution_ids":["https://openalex.org/I100066346"]},{"raw_affiliation_string":"University of Stuttgart, Institute for Natural Language Processing, Germany","institution_ids":["https://openalex.org/I100066346"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5078880407"],"corresponding_institution_ids":["https://openalex.org/I100066346"],"apc_list":null,"apc_paid":null,"fwci":0.1129,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.20822488,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"4","issue":null,"first_page":"876","last_page":"883"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8700246810913086},{"id":"https://openalex.org/keywords/paralanguage","display_name":"Paralanguage","score":0.7927855849266052},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.6949498057365417},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6679068803787231},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.558563768863678},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5567036867141724},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5119088888168335},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.48277178406715393},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.46142369508743286},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4395258128643036},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.43641936779022217}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8700246810913086},{"id":"https://openalex.org/C133378560","wikidata":"https://www.wikidata.org/wiki/Q1753225","display_name":"Paralanguage","level":2,"score":0.7927855849266052},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.6949498057365417},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6679068803787231},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.558563768863678},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5567036867141724},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5119088888168335},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.48277178406715393},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.46142369508743286},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4395258128643036},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.43641936779022217},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/slt54892.2023.10022897","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/slt54892.2023.10022897","pdf_url":null,"source":{"id":"https://openalex.org/S4363605953","display_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE Spoken Language Technology Workshop (SLT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.75}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1962925405","https://openalex.org/W1983645263","https://openalex.org/W2146334809","https://openalex.org/W2171590421","https://openalex.org/W2187089797","https://openalex.org/W2530305026","https://openalex.org/W2726515241","https://openalex.org/W2754547322","https://openalex.org/W2807627734","https://openalex.org/W2842511635","https://openalex.org/W2899436735","https://openalex.org/W2962788625","https://openalex.org/W2963775347","https://openalex.org/W2964105864","https://openalex.org/W2972362771","https://openalex.org/W2982223350","https://openalex.org/W3001160332","https://openalex.org/W3015265920","https://openalex.org/W3024869864","https://openalex.org/W3041561163","https://openalex.org/W3089942512","https://openalex.org/W3112034174","https://openalex.org/W3113594615","https://openalex.org/W3119689200","https://openalex.org/W3127686677","https://openalex.org/W3181310845","https://openalex.org/W3197580070","https://openalex.org/W3197642003","https://openalex.org/W3198275944","https://openalex.org/W3209059054","https://openalex.org/W4210694145","https://openalex.org/W4297808394","https://openalex.org/W6735236233","https://openalex.org/W6752888775","https://openalex.org/W6753417141","https://openalex.org/W6771467084","https://openalex.org/W6780218876","https://openalex.org/W6786669483","https://openalex.org/W6787141514","https://openalex.org/W6791742336"],"related_works":["https://openalex.org/W2064370490","https://openalex.org/W3166813893","https://openalex.org/W2910013580","https://openalex.org/W2391900574","https://openalex.org/W3200958703","https://openalex.org/W1990078780","https://openalex.org/W2778981579","https://openalex.org/W2376619307","https://openalex.org/W4250647969","https://openalex.org/W3108667266"],"abstract_inverted_index":{"Embedding":[0],"paralinguistic":[1],"properties":[2,55],"is":[3,29,47],"a":[4,11,32,51,71,97,109,139],"challenging":[5],"task":[6],"as":[7,21],"there":[8],"are":[9,131],"only":[10],"few":[12],"hours":[13],"of":[14,41,111],"training":[15],"data":[16],"available":[17],"for":[18,119],"domains":[19],"such":[20],"emotional":[22],"speech.":[23,43],"One":[24],"solution":[25],"to":[26,30,50,75,88,96,102,116],"this":[27,67],"problem":[28],"pretrain":[31],"general":[33],"self-supervised":[34],"speech":[35],"representation":[36],"model":[37,46],"on":[38,134,141],"large":[39],"amounts":[40],"unlabeled":[42],"This":[44],"pretrained":[45],"then":[48,83],"finetuned":[49,132],"specific":[52],"task.":[53,99],"Paralinguistic":[54],"however":[56],"have":[57],"notoriously":[58],"high":[59],"class":[60,105,120],"variance,":[61],"making":[62],"the":[63,80,90,93,104],"finetuning":[64],"ineffective.":[65],"In":[66,100],"work,":[68],"we":[69,78,84,107],"propose":[70],"two":[72],"step":[73],"approach":[74,126],"this.":[76],"First":[77],"improve":[79,103],"embedding":[81,94],"space,":[82],"train":[85],"an":[86],"adapter":[87],"bridge":[89],"gap":[91],"from":[92],"space":[95],"classification":[98],"order":[101],"invariance":[106],"use":[108],"combination":[110],"contrastive":[112],"and":[113,137],"non-contrastive":[114],"losses":[115],"explicitly":[117],"optimize":[118],"invariant,":[121],"yet":[122],"discriminative":[123],"features.":[124],"Our":[125],"consistently":[127],"outperforms":[128],"baselines":[129],"that":[130],"end-to-end":[133],"multiple":[135],"tasks":[136],"surpasses":[138],"benchmark":[140],"state-of-the-art":[142],"emotion":[143],"classification.":[144]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
