{"id":"https://openalex.org/W3204853141","doi":"https://doi.org/10.1109/ispa52656.2021.9552068","title":"Speech Emotion Recognition using GhostVLAD and Sentiment Metric Learning","display_name":"Speech Emotion Recognition using GhostVLAD and Sentiment Metric Learning","publication_year":2021,"publication_date":"2021-09-13","ids":{"openalex":"https://openalex.org/W3204853141","doi":"https://doi.org/10.1109/ispa52656.2021.9552068","mag":"3204853141"},"language":"en","primary_location":{"id":"doi:10.1109/ispa52656.2021.9552068","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ispa52656.2021.9552068","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 12th International Symposium on Image and Signal Processing and Analysis (ISPA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073509926","display_name":"Bogdan Mocanu","orcid":"https://orcid.org/0000-0002-2751-0954"},"institutions":[{"id":"https://openalex.org/I205703379","display_name":"Institut Mines-T\u00e9l\u00e9com","ror":"https://ror.org/025vp2923","country_code":"FR","type":"facility","lineage":["https://openalex.org/I205703379"]},{"id":"https://openalex.org/I4210145102","display_name":"Institut Polytechnique de Paris","ror":"https://ror.org/042tfbd02","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210145102"]},{"id":"https://openalex.org/I61641377","display_name":"Universitatea Na\u021bional\u0103 de \u0218tiin\u021b\u0103 \u0219i Tehnologie Politehnica Bucure\u0219ti","ror":"https://ror.org/0558j5q12","country_code":"RO","type":"education","lineage":["https://openalex.org/I61641377"]}],"countries":["FR","RO"],"is_corresponding":true,"raw_author_name":"Bogdan Mocanu","raw_affiliation_strings":["Institut Polytechnique de Paris, T\u00e9l\u00e9com SudParis, France","Universit\u00e9 Politehnica [Bucarest, Roumanie] (Splaiul Independen\u0163ei nr. 313, Sector 6, Bucharest, Romania - Romania)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institut Polytechnique de Paris, T\u00e9l\u00e9com SudParis, France","institution_ids":["https://openalex.org/I4210145102","https://openalex.org/I205703379"]},{"raw_affiliation_string":"Universit\u00e9 Politehnica [Bucarest, Roumanie] (Splaiul Independen\u0163ei nr. 313, Sector 6, Bucharest, Romania - Romania)","institution_ids":["https://openalex.org/I61641377"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025204322","display_name":"Ruxandra \u021aapu","orcid":"https://orcid.org/0000-0003-3170-4150"},"institutions":[{"id":"https://openalex.org/I205703379","display_name":"Institut Mines-T\u00e9l\u00e9com","ror":"https://ror.org/025vp2923","country_code":"FR","type":"facility","lineage":["https://openalex.org/I205703379"]},{"id":"https://openalex.org/I4210145102","display_name":"Institut Polytechnique de Paris","ror":"https://ror.org/042tfbd02","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210145102"]},{"id":"https://openalex.org/I4387153010","display_name":"T\u00e9l\u00e9com SudParis","ror":"https://ror.org/05xvk4r52","country_code":"FR","type":"education","lineage":["https://openalex.org/I205703379","https://openalex.org/I4210145102","https://openalex.org/I4387153010"]},{"id":"https://openalex.org/I61641377","display_name":"Universitatea Na\u021bional\u0103 de \u0218tiin\u021b\u0103 \u0219i Tehnologie Politehnica Bucure\u0219ti","ror":"https://ror.org/0558j5q12","country_code":"RO","type":"education","lineage":["https://openalex.org/I61641377"]}],"countries":["FR","RO"],"is_corresponding":false,"raw_author_name":"Ruxandra Tapu","raw_affiliation_strings":["Faculty of ETTI, University \u201cPolitehnica\u201d of Bucharest, Romania","Institut Polytechnique de Paris, T\u00e9l\u00e9com SudParis, France","IP Paris - Institut Polytechnique de Paris (France)","TSP - ARTEMIS - D\u00e9partement Advanced Research And Techniques For Multidimensional Imaging Systems (T\u00e9l\u00e9com SudParis - 9 rue Charles Fourier. 91011 \u00c9vry Cedex - France)","ARMEDIA-SAMOVAR - ARMEDIA (TELECOM Sudparis\r\n 9 rue Charles Fourier\r\n 91011 EVRY - France)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of ETTI, University \u201cPolitehnica\u201d of Bucharest, Romania","institution_ids":["https://openalex.org/I61641377"]},{"raw_affiliation_string":"Institut Polytechnique de Paris, T\u00e9l\u00e9com SudParis, France","institution_ids":["https://openalex.org/I4210145102","https://openalex.org/I205703379"]},{"raw_affiliation_string":"IP Paris - Institut Polytechnique de Paris (France)","institution_ids":["https://openalex.org/I4210145102"]},{"raw_affiliation_string":"TSP - ARTEMIS - D\u00e9partement Advanced Research And Techniques For Multidimensional Imaging Systems (T\u00e9l\u00e9com SudParis - 9 rue Charles Fourier. 91011 \u00c9vry Cedex - France)","institution_ids":["https://openalex.org/I4210145102","https://openalex.org/I4387153010"]},{"raw_affiliation_string":"ARMEDIA-SAMOVAR - ARMEDIA (TELECOM Sudparis\r\n 9 rue Charles Fourier\r\n 91011 EVRY - France)","institution_ids":["https://openalex.org/I4387153010"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5073509926"],"corresponding_institution_ids":["https://openalex.org/I205703379","https://openalex.org/I4210145102","https://openalex.org/I61641377"],"apc_list":null,"apc_paid":null,"fwci":0.9102,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.7625,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.801663339138031},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7962459325790405},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.691615104675293},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.6883015036582947},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.6420612335205078},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6181557178497314},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5888978838920593},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5768190026283264},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5560210347175598},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5386415123939514},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5332446098327637},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5326093435287476},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5015890598297119},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4476739168167114},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.423446387052536},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.41279274225234985},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32685720920562744}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.801663339138031},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7962459325790405},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.691615104675293},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.6883015036582947},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.6420612335205078},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6181557178497314},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5888978838920593},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5768190026283264},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5560210347175598},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5386415123939514},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5332446098327637},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5326093435287476},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5015890598297119},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4476739168167114},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.423446387052536},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.41279274225234985},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32685720920562744},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/ispa52656.2021.9552068","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ispa52656.2021.9552068","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 12th International Symposium on Image and Signal Processing and Analysis (ISPA)","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-03590987v1","is_oa":false,"landing_page_url":"https://hal.science/hal-03590987","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISPA 2021: 12th international symposium on Image and Signal Processing and Analysis, Sep 2021, Zagreb (online), Croatia. pp.126-130, &#x27E8;10.1109/ISPA52656.2021.9552068&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.7400000095367432}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W2003238582","https://openalex.org/W2022068631","https://openalex.org/W2030931454","https://openalex.org/W2087618018","https://openalex.org/W2118526556","https://openalex.org/W2155986100","https://openalex.org/W2170876097","https://openalex.org/W2189018360","https://openalex.org/W2194775991","https://openalex.org/W2395587375","https://openalex.org/W2620629206","https://openalex.org/W2625297138","https://openalex.org/W2746241180","https://openalex.org/W2746978487","https://openalex.org/W2786779322","https://openalex.org/W2803193013","https://openalex.org/W2808631503","https://openalex.org/W2889065492","https://openalex.org/W2896584247","https://openalex.org/W2919115771","https://openalex.org/W2951019013","https://openalex.org/W2963507712","https://openalex.org/W2964370293","https://openalex.org/W3008039831","https://openalex.org/W3097255602","https://openalex.org/W3106506544","https://openalex.org/W3161565210"],"related_works":["https://openalex.org/W2184242386","https://openalex.org/W2325729322","https://openalex.org/W1923358586","https://openalex.org/W3208297503","https://openalex.org/W3119773509","https://openalex.org/W2889153461","https://openalex.org/W2964117661","https://openalex.org/W2735297260","https://openalex.org/W4388405611","https://openalex.org/W2619127353"],"abstract_inverted_index":{"In":[0,54],"this":[1],"paper,":[2],"we":[3,56],"introduce":[4,57],"a":[5,17,24,58],"novel":[6],"deep":[7],"learning-based":[8],"speech":[9],"emotion":[10,88],"recognition":[11,89],"method.":[12],"The":[13,29,72,97],"proposed":[14,84,103],"approach":[15,104],"exploits":[16],"convolutional":[18],"neural":[19],"network":[20],"(CNN),":[21],"enriched":[22],"with":[23,111],"GhostVLAD":[25],"feature":[26],"aggregation":[27],"layer.":[28],"resulting":[30],"representation":[31,44],"adjusts":[32],"the":[33,40,65,68,83,102,108],"contribution":[34],"of":[35,107,115],"each":[36],"spectrogram":[37],"segments":[38],"to":[39,92],"final":[41],"class":[42],"prototype":[43],"and":[45,50,79,94],"is":[46],"used":[47],"for":[48],"trainable":[49],"discriminative":[51],"clustering":[52],"purposes.":[53],"addition,":[55],"modified":[59],"triplet":[60],"loss":[61],"function":[62],"which":[63,86],"integrates":[64],"relations":[66],"between":[67],"various":[69],"emotional":[70],"patterns.":[71],"experimental":[73],"evaluation,":[74],"carried":[75],"out":[76],"on":[77],"RAVDESS":[78],"CREMA-D":[80],"datasets":[81],"validates":[82],"methodology,":[85],"yields":[87],"rates":[90],"superior":[91],"83%":[93],"64%,":[95],"respectively.":[96],"comparative":[98],"evaluation":[99],"shows":[100],"that":[101],"outperforms":[105],"state":[106],"art":[109],"techniques,":[110],"gains":[112],"in":[113],"accuracy":[114],"more":[116],"than":[117],"3%.":[118]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2026-06-05T09:01:59.212387","created_date":"2025-10-10T00:00:00"}
