{"id":"https://openalex.org/W4391149307","doi":"https://doi.org/10.1109/ictc58733.2023.10392928","title":"Comparative analysis of multi-loss functions for enhanced multi-modal speech emotion recognition","display_name":"Comparative analysis of multi-loss functions for enhanced multi-modal speech emotion recognition","publication_year":2023,"publication_date":"2023-10-11","ids":{"openalex":"https://openalex.org/W4391149307","doi":"https://doi.org/10.1109/ictc58733.2023.10392928"},"language":"en","primary_location":{"id":"doi:10.1109/ictc58733.2023.10392928","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ictc58733.2023.10392928","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 14th International Conference on Information and Communication Technology Convergence (ICTC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045633892","display_name":"Phuong-Nam Tran","orcid":"https://orcid.org/0009-0009-6551-9106"},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":true,"raw_author_name":"Phuong-Nam Tran","raw_affiliation_strings":["FPT University,Dept. of Computing Fundamental,Ho Chi Minh City,Vietnam","Dept. of Computing Fundamental, FPT University, Ho Chi Minh City, Vietnam"],"affiliations":[{"raw_affiliation_string":"FPT University,Dept. of Computing Fundamental,Ho Chi Minh City,Vietnam","institution_ids":["https://openalex.org/I109689652"]},{"raw_affiliation_string":"Dept. of Computing Fundamental, FPT University, Ho Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I109689652"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091615160","display_name":"Thuy-Duong Thi Vu","orcid":"https://orcid.org/0000-0001-8614-8732"},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Thuy-Duong Thi Vu","raw_affiliation_strings":["FPT University,Dept. of Computing Fundamental,Ho Chi Minh City,Vietnam","Dept. of Computing Fundamental, FPT University, Ho Chi Minh City, Vietnam"],"affiliations":[{"raw_affiliation_string":"FPT University,Dept. of Computing Fundamental,Ho Chi Minh City,Vietnam","institution_ids":["https://openalex.org/I109689652"]},{"raw_affiliation_string":"Dept. of Computing Fundamental, FPT University, Ho Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I109689652"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091402542","display_name":"Nhat Truong Pham","orcid":"https://orcid.org/0000-0002-8086-6722"},"institutions":[{"id":"https://openalex.org/I848706","display_name":"Sungkyunkwan University","ror":"https://ror.org/04q78tk20","country_code":"KR","type":"education","lineage":["https://openalex.org/I848706"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Nhat Truong Pham","raw_affiliation_strings":["Sungkyunkwan University,Dept. of Integrative Biotechnology,Suwon,Republic of Korea","Dept. of Integrative Biotechnology, Sungkyunkwan University, Suwon, Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Sungkyunkwan University,Dept. of Integrative Biotechnology,Suwon,Republic of Korea","institution_ids":["https://openalex.org/I848706"]},{"raw_affiliation_string":"Dept. of Integrative Biotechnology, Sungkyunkwan University, Suwon, Republic of Korea","institution_ids":["https://openalex.org/I848706"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036183089","display_name":"Hanh Dang-Ngoc","orcid":"https://orcid.org/0000-0002-5155-5730"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Hanh Dang-Ngoc","raw_affiliation_strings":["University of Technology Sydney,School of Electrical and Data Engineering,Sydney, NSW,Australia","School of Electrical and Data Engineering, University of Technology Sydney, Sydney, NSW, Australia"],"affiliations":[{"raw_affiliation_string":"University of Technology Sydney,School of Electrical and Data Engineering,Sydney, NSW,Australia","institution_ids":["https://openalex.org/I114017466"]},{"raw_affiliation_string":"School of Electrical and Data Engineering, University of Technology Sydney, Sydney, NSW, Australia","institution_ids":["https://openalex.org/I114017466"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034169259","display_name":"Duc Ngoc Minh Dang","orcid":"https://orcid.org/0000-0001-9302-3129"},"institutions":[{"id":"https://openalex.org/I109689652","display_name":"FPT University","ror":"https://ror.org/03esj4g97","country_code":"VN","type":"education","lineage":["https://openalex.org/I109689652"]}],"countries":["VN"],"is_corresponding":false,"raw_author_name":"Duc Ngoc Minh Dang","raw_affiliation_strings":["FPT University,Dept. of Computing Fundamental,Ho Chi Minh City,Vietnam","Dept. of Computing Fundamental, FPT University, Ho Chi Minh City, Vietnam"],"affiliations":[{"raw_affiliation_string":"FPT University,Dept. of Computing Fundamental,Ho Chi Minh City,Vietnam","institution_ids":["https://openalex.org/I109689652"]},{"raw_affiliation_string":"Dept. of Computing Fundamental, FPT University, Ho Chi Minh City, Vietnam","institution_ids":["https://openalex.org/I109689652"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5045633892"],"corresponding_institution_ids":["https://openalex.org/I109689652"],"apc_list":null,"apc_paid":null,"fwci":0.5224,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.70437996,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"425","last_page":"429"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7314068078994751},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.7090215086936951},{"id":"https://openalex.org/keywords/cross-entropy","display_name":"Cross entropy","score":0.6036030650138855},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.589900016784668},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.44801610708236694},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4437209367752075},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.44161587953567505},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4378807842731476},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.4264613389968872},{"id":"https://openalex.org/keywords/information-loss","display_name":"Information loss","score":0.4227864444255829},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38516664505004883},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3507859706878662}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7314068078994751},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.7090215086936951},{"id":"https://openalex.org/C167981619","wikidata":"https://www.wikidata.org/wiki/Q1685498","display_name":"Cross entropy","level":3,"score":0.6036030650138855},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.589900016784668},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.44801610708236694},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4437209367752075},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.44161587953567505},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4378807842731476},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.4264613389968872},{"id":"https://openalex.org/C2988416141","wikidata":"https://www.wikidata.org/wiki/Q6031139","display_name":"Information loss","level":2,"score":0.4227864444255829},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38516664505004883},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3507859706878662},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ictc58733.2023.10392928","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ictc58733.2023.10392928","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 14th International Conference on Information and Communication Technology Convergence (ICTC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.6399999856948853,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W2146334809","https://openalex.org/W2520774990","https://openalex.org/W2526050071","https://openalex.org/W2912083425","https://openalex.org/W2963430102","https://openalex.org/W3127976608","https://openalex.org/W3201040525","https://openalex.org/W4384209462","https://openalex.org/W4388106332","https://openalex.org/W6739901393"],"related_works":["https://openalex.org/W2951959408","https://openalex.org/W2895831313","https://openalex.org/W4289406078","https://openalex.org/W4386453465","https://openalex.org/W3194440789","https://openalex.org/W4390839675","https://openalex.org/W3163951405","https://openalex.org/W4287659163","https://openalex.org/W4226279124","https://openalex.org/W4288264391"],"abstract_inverted_index":{"In":[0,85],"recent":[1],"years,":[2],"multi-modal":[3,82,98,112,151],"analysis":[4],"has":[5,47,63],"gained":[6],"significant":[7],"prominence":[8],"across":[9],"domains":[10],"such":[11,118],"as":[12,119],"audio/speech":[13],"processing,":[14,17],"natural":[15],"language":[16],"and":[18,40,126,143,161],"affective":[19],"computing,":[20],"with":[21,44,114],"a":[22],"particular":[23],"focus":[24,89],"on":[25,90,97,130,168],"speech":[26],"emotion":[27],"recognition":[28],"(SER).":[29],"The":[30],"integration":[31],"of":[32,52,93,111,140,159,166],"data":[33],"from":[34],"diverse":[35],"sources,":[36],"encompassing":[37],"text,":[38],"audio,":[39],"images,":[41],"in":[42,55],"conjunction":[43],"classifier":[45],"algorithms":[46],"led":[48],"to":[49,74],"the":[50,59,67,76,91,94,103,109,138,147,156,162,169],"realization":[51],"enhanced":[53],"performance":[54,110,149],"SER":[56,99,113],"tasks.":[57,84],"Traditionally,":[58],"cross-entropy":[60,120,141],"loss":[61,95,116,142,145],"function":[62],"been":[64],"employed":[65],"for":[66,81,150],"classification":[68,83],"problem.":[69],"However,":[70],"it":[71,134],"is":[72,135],"challenging":[73],"discriminate":[75],"feature":[77],"representations":[78],"among":[79],"classes":[80],"this":[86],"study,":[87],"we":[88,107],"impact":[92],"functions":[96],"rather":[100],"than":[101],"designing":[102],"model":[104],"architecture.":[105],"Mainly,":[106],"evaluate":[108],"different":[115],"functions,":[117],"loss,":[121,123,125],"center":[122],"contrastive-center":[124,144],"their":[127],"combinations.":[128],"Based":[129],"extensive":[131],"comparative":[132],"analysis,":[133],"proven":[136],"that":[137],"combination":[139,154],"achieves":[146],"best":[148],"SER.":[152],"This":[153],"reaches":[155],"highest":[157,163],"accuracy":[158,165],"80.27%":[160],"balanced":[164],"81.44%":[167],"IEMOCAP":[170],"dataset.":[171]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
