{"id":"https://openalex.org/W4405112824","doi":"https://doi.org/10.1016/j.procs.2024.11.082","title":"Multimodal Sentiment Analysis based on Video and Audio Inputs","display_name":"Multimodal Sentiment Analysis based on Video and Audio Inputs","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4405112824","doi":"https://doi.org/10.1016/j.procs.2024.11.082"},"language":"en","primary_location":{"id":"doi:10.1016/j.procs.2024.11.082","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2024.11.082","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1016/j.procs.2024.11.082","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030382432","display_name":"Antonio Fern\u00e1ndez","orcid":"https://orcid.org/0000-0002-4320-2409"},"institutions":[{"id":"https://openalex.org/I125068653","display_name":"IE University","ror":"https://ror.org/02jjdwm75","country_code":"ES","type":"education","lineage":["https://openalex.org/I125068653"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Antonio Fern\u00e1ndez","raw_affiliation_strings":["School of science and technology, IE University, Madrid, Spain","Cyphy Life, Robotics & AI Lab, School of science and technology, IE University, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"School of science and technology, IE University, Madrid, Spain","institution_ids":["https://openalex.org/I125068653"]},{"raw_affiliation_string":"Cyphy Life, Robotics & AI Lab, School of science and technology, IE University, Madrid, Spain","institution_ids":["https://openalex.org/I125068653"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049687019","display_name":"Suzan Awinat","orcid":null},"institutions":[{"id":"https://openalex.org/I125068653","display_name":"IE University","ror":"https://ror.org/02jjdwm75","country_code":"ES","type":"education","lineage":["https://openalex.org/I125068653"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Suzan Awinat","raw_affiliation_strings":["School of science and technology, IE University, Madrid, Spain","Cyphy Life, Robotics & AI Lab, School of science and technology, IE University, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"School of science and technology, IE University, Madrid, Spain","institution_ids":["https://openalex.org/I125068653"]},{"raw_affiliation_string":"Cyphy Life, Robotics & AI Lab, School of science and technology, IE University, Madrid, Spain","institution_ids":["https://openalex.org/I125068653"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5030382432"],"corresponding_institution_ids":["https://openalex.org/I125068653"],"apc_list":null,"apc_paid":null,"fwci":0.6748,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.70238915,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"251","issue":null,"first_page":"41","last_page":"48"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9418431520462036},{"id":"https://openalex.org/keywords/sentiment-analysis","display_name":"Sentiment analysis","score":0.567317545413971},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.48235148191452026},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.45870575308799744},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34698718786239624},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3337233066558838}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9418431520462036},{"id":"https://openalex.org/C66402592","wikidata":"https://www.wikidata.org/wiki/Q2271421","display_name":"Sentiment analysis","level":2,"score":0.567317545413971},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.48235148191452026},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.45870575308799744},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34698718786239624},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3337233066558838}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1016/j.procs.2024.11.082","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2024.11.082","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2412.09317","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2412.09317","pdf_url":"https://arxiv.org/pdf/2412.09317","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1016/j.procs.2024.11.082","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2024.11.082","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W2030931454","https://openalex.org/W2101234009","https://openalex.org/W2191779130","https://openalex.org/W2964194271","https://openalex.org/W3036601975","https://openalex.org/W3046174388","https://openalex.org/W3081192838","https://openalex.org/W3171536535","https://openalex.org/W3197876970","https://openalex.org/W3208743843","https://openalex.org/W4205185581","https://openalex.org/W4214540501","https://openalex.org/W4295312788","https://openalex.org/W4311461310","https://openalex.org/W4320477735","https://openalex.org/W4391021755","https://openalex.org/W6675354045","https://openalex.org/W6713134421","https://openalex.org/W6766978945","https://openalex.org/W6769243733","https://openalex.org/W6780218876","https://openalex.org/W6793119350","https://openalex.org/W6800989902","https://openalex.org/W6802745079","https://openalex.org/W6849762702"],"related_works":["https://openalex.org/W2548633793","https://openalex.org/W3013279174","https://openalex.org/W2941935829","https://openalex.org/W2596247554","https://openalex.org/W3132372214","https://openalex.org/W4224284088","https://openalex.org/W4286571989","https://openalex.org/W2765903680","https://openalex.org/W4317653575","https://openalex.org/W2801635251"],"abstract_inverted_index":{"Despite":[0],"the":[1,8,16,21,44,61,64,70,86,95,102,109,116,121,136,140,145,153],"abundance":[2],"of":[3,38,46,94,120],"current":[4],"researches":[5],"working":[6],"on":[7,149],"sentiment":[9],"analysis":[10],"from":[11],"videos":[12],"and":[13,53,69,85,152],"audios,":[14],"finding":[15],"best":[17],"model":[18],"that":[19,50,78,163],"gives":[20,160],"highest":[22],"accuracy":[23],"rate":[24],"is":[25,41,106,130],"still":[26],"considered":[27],"a":[28],"challenge":[29],"for":[30,67,73,83,90,97],"researchers":[31],"in":[32,108,115],"this":[33,39],"field.":[34],"The":[35,56,75,92,132],"main":[36],"objective":[37],"paper":[40],"to":[42,59],"prove":[43],"usability":[45],"emotion":[47,99],"recognition":[48],"models":[49,62,77,105,122],"take":[51],"video":[52],"audio":[54,68,84],"inputs.":[55],"datasets":[57],"used":[58,80,134],"train":[60],"are":[63,135],"CREMA-D":[65],"dataset":[66,72],"RAVDESS":[71],"video.":[74,91],"fine-tuned":[76],"been":[79],"are:":[81],"Facebook/wav2vec2-large":[82],"Google/vivit-b-16":[87],"\u00d7":[88],"2-kinetics400":[89],"avarage":[93],"probabilities":[96],"each":[98],"generated":[100],"by":[101],"two":[103],"previous":[104],"utilized":[107],"decision":[110],"making":[111],"framework.":[112],"After":[113],"disparity":[114],"results,":[117],"if":[118],"one":[119],"gets":[123],"much":[124],"higher":[125],"accuracy,":[126],"another":[127],"test":[128],"framework":[129],"created.":[131],"methods":[133,169],"Weighted":[137],"Average":[138],"method,":[139,144,151],"Confidence":[141,150],"Level":[142],"Threshold":[143],"Dynamic":[146],"Weighting":[147],"Based":[148],"Rule-Based":[154],"Logic":[155],"method.":[156],"This":[157],"limited":[158],"approach":[159],"encouraging":[161],"results":[162],"make":[164],"future":[165],"research":[166],"into":[167],"these":[168],"viable.":[170]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-04-18T07:56:08.524223","created_date":"2025-10-10T00:00:00"}
