{"id":"https://openalex.org/W4392904295","doi":"https://doi.org/10.1109/icassp48485.2024.10448213","title":"Prompting Audios Using Acoustic Properties for Emotion Representation","display_name":"Prompting Audios Using Acoustic Properties for Emotion Representation","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392904295","doi":"https://doi.org/10.1109/icassp48485.2024.10448213"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10448213","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10448213","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074132219","display_name":"Hira Dhamyal","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hira Dhamyal","raw_affiliation_strings":["Carnegie Mellon University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073250019","display_name":"Benjamin Elizalde","orcid":"https://orcid.org/0000-0001-6461-5790"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Benjamin Elizalde","raw_affiliation_strings":["Microsoft"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017946811","display_name":"Soham Deshmukh","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Soham Deshmukh","raw_affiliation_strings":["Microsoft"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101528069","display_name":"Huaming Wang","orcid":"https://orcid.org/0000-0002-4434-7482"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Huaming Wang","raw_affiliation_strings":["Microsoft"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113017615","display_name":"Bhiksha Raj","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]},{"id":"https://openalex.org/I91044093","display_name":"Zayed University","ror":"https://ror.org/03snqfa66","country_code":"AE","type":"education","lineage":["https://openalex.org/I91044093"]}],"countries":["AE","US"],"is_corresponding":false,"raw_author_name":"Bhiksha Raj","raw_affiliation_strings":["Carnegie Mellon University","Mohammed bin Zayed University of AI"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Mohammed bin Zayed University of AI","institution_ids":["https://openalex.org/I91044093"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102775511","display_name":"Rita Singh","orcid":"https://orcid.org/0000-0003-3743-0162"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rita Singh","raw_affiliation_strings":["Carnegie Mellon University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5074132219"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":0.9827,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.71970741,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"11936","last_page":"11940"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6978034377098083},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6419085264205933},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.6295719742774963},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5771397948265076},{"id":"https://openalex.org/keywords/articulation","display_name":"Articulation (sociology)","score":0.5217719674110413},{"id":"https://openalex.org/keywords/facial-expression","display_name":"Facial expression","score":0.4536834955215454},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4134836494922638},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3584921061992645}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6978034377098083},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6419085264205933},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.6295719742774963},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5771397948265076},{"id":"https://openalex.org/C2779337067","wikidata":"https://www.wikidata.org/wiki/Q4800961","display_name":"Articulation (sociology)","level":3,"score":0.5217719674110413},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.4536834955215454},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4134836494922638},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3584921061992645},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10448213","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10448213","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W236071763","https://openalex.org/W1483830027","https://openalex.org/W1518634923","https://openalex.org/W2024218186","https://openalex.org/W2030931454","https://openalex.org/W2146334809","https://openalex.org/W2167375515","https://openalex.org/W2191779130","https://openalex.org/W2407162330","https://openalex.org/W2883409523","https://openalex.org/W2940168379","https://openalex.org/W2963686995","https://openalex.org/W2979826702","https://openalex.org/W2982063868","https://openalex.org/W3015591594","https://openalex.org/W3081192838","https://openalex.org/W3094550259","https://openalex.org/W3097805260","https://openalex.org/W3162331882","https://openalex.org/W4205633160","https://openalex.org/W4205689591","https://openalex.org/W4255053369","https://openalex.org/W4297841634","https://openalex.org/W4304080215","https://openalex.org/W4309762704","https://openalex.org/W4372266552","https://openalex.org/W4385822467","https://openalex.org/W6609052613","https://openalex.org/W6769663309"],"related_works":["https://openalex.org/W2416809655","https://openalex.org/W2011075082","https://openalex.org/W4205477866","https://openalex.org/W2038626839","https://openalex.org/W2369832197","https://openalex.org/W3126677997","https://openalex.org/W1610857240","https://openalex.org/W2584926856","https://openalex.org/W2075935902","https://openalex.org/W2014713986"],"abstract_inverted_index":{"Emotions":[0],"lie":[1],"on":[2,110,145],"a":[3,11,56,94,140],"continuum,":[4],"but":[5],"current":[6],"models":[7],"treat":[8],"emotions":[9,31],"as":[10],"finite":[12],"valued":[13],"discrete":[14],"variable.":[15],"This":[16],"representation":[17],"does":[18],"not":[19],"capture":[20],"the":[21,24,34,47,122,127,146],"diversity":[22],"in":[23,130,132],"expression":[25],"of":[26,36,49],"emotion.":[27],"To":[28],"better":[29,59],"represent":[30],"we":[32,45,138],"propose":[33],"use":[35,69,93],"natural":[37],"language":[38],"descriptions":[39],"(or":[40],"prompts).":[41],"In":[42,136],"this":[43],"work,":[44],"address":[46],"challenge":[48],"automatically":[50,86],"generating":[51],"these":[52],"prompts":[53,88,124],"and":[54,65,82,114],"training":[55],"model":[57,109],"to":[58,75,85,98,101],"learn":[60],"emotion":[61,76],"representations":[62],"from":[63],"audio":[64],"prompt":[66],"pairs.":[67],"We":[68,92,106],"acoustic":[70,104,123],"properties":[71],"that":[72,121],"are":[73],"correlated":[74],"like":[77],"pitch,":[78],"intensity,":[79],"speech":[80,100],"rate,":[81],"articulation":[83],"rate":[84],"generate":[87],"i.e.":[89],"\u2018acoustic":[90],"prompts\u2019.":[91],"contrastive":[95],"learning":[96],"objective":[97],"map":[99],"their":[102],"respective":[103],"prompts.":[105],"evaluate":[107],"our":[108],"Emotion":[111,116],"Audio":[112],"Retrieval":[113],"Speech":[115],"Recognition.":[117],"Our":[118],"results":[119],"show":[120],"significantly":[125],"improve":[126],"model\u2019s":[128],"performance":[129],"EAR,":[131],"various":[133],"Precision@K":[134],"metrics.":[135],"SER,":[137],"observe":[139],"3.8%":[141],"relative":[142],"accuracy":[143],"improvement":[144],"Ravdess":[147],"dataset.":[148]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-28T14:05:53.105641","created_date":"2025-10-10T00:00:00"}
