{"id":"https://openalex.org/W2548844710","doi":"https://doi.org/10.1145/2993148.2997629","title":"Video emotion recognition in the wild based on fusion of multimodal features","display_name":"Video emotion recognition in the wild based on fusion of multimodal features","publication_year":2016,"publication_date":"2016-10-31","ids":{"openalex":"https://openalex.org/W2548844710","doi":"https://doi.org/10.1145/2993148.2997629","mag":"2548844710"},"language":"en","primary_location":{"id":"doi:10.1145/2993148.2997629","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2993148.2997629","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM International Conference on Multimodal Interaction","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101776086","display_name":"Shizhe Chen","orcid":"https://orcid.org/0000-0002-7313-9703"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shizhe Chen","raw_affiliation_strings":["Renmin University of China, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100601315","display_name":"Xinrui Li","orcid":"https://orcid.org/0000-0001-6677-5683"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinrui Li","raw_affiliation_strings":["Renmin University of China, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009985839","display_name":"Qin Jin","orcid":"https://orcid.org/0000-0001-6486-6020"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qin Jin","raw_affiliation_strings":["Renmin University of China, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101618830","display_name":"Shilei Zhang","orcid":"https://orcid.org/0009-0007-5182-3065"},"institutions":[{"id":"https://openalex.org/I4210126794","display_name":"IBM Research (China)","ror":"https://ror.org/02yg1pf55","country_code":"CN","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210126794"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shilei Zhang","raw_affiliation_strings":["IBM Research, China"],"affiliations":[{"raw_affiliation_string":"IBM Research, China","institution_ids":["https://openalex.org/I4210126794"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088716214","display_name":"Yong Qin","orcid":"https://orcid.org/0000-0002-6519-8316"},"institutions":[{"id":"https://openalex.org/I4210126794","display_name":"IBM Research (China)","ror":"https://ror.org/02yg1pf55","country_code":"CN","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115","https://openalex.org/I4210126794"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Qin","raw_affiliation_strings":["IBM Research, China"],"affiliations":[{"raw_affiliation_string":"IBM Research, China","institution_ids":["https://openalex.org/I4210126794"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101776086"],"corresponding_institution_ids":["https://openalex.org/I78988378"],"apc_list":null,"apc_paid":null,"fwci":2.117,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.87675955,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"494","last_page":"500"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.798630952835083},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7419525384902954},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7222636342048645},{"id":"https://openalex.org/keywords/mel-frequency-cepstrum","display_name":"Mel-frequency cepstrum","score":0.6751919984817505},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5855289101600647},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.5834430456161499},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5572580695152283},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5223989486694336},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.47741615772247314},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.4675917327404022},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.46052441000938416},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.4218937158584595},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3917485773563385}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.798630952835083},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7419525384902954},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7222636342048645},{"id":"https://openalex.org/C151989614","wikidata":"https://www.wikidata.org/wiki/Q440370","display_name":"Mel-frequency cepstrum","level":3,"score":0.6751919984817505},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5855289101600647},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.5834430456161499},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5572580695152283},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5223989486694336},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.47741615772247314},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.4675917327404022},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.46052441000938416},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.4218937158584595},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3917485773563385},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2993148.2997629","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2993148.2997629","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM International Conference on Multimodal Interaction","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.7099999785423279,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W273955616","https://openalex.org/W825525975","https://openalex.org/W1480106457","https://openalex.org/W1501669607","https://openalex.org/W1625255723","https://openalex.org/W1795776638","https://openalex.org/W1849007038","https://openalex.org/W1896424170","https://openalex.org/W1964442016","https://openalex.org/W1966385142","https://openalex.org/W1976235033","https://openalex.org/W1987048275","https://openalex.org/W2012237712","https://openalex.org/W2085662862","https://openalex.org/W2105101328","https://openalex.org/W2148154194","https://openalex.org/W2151103935","https://openalex.org/W2172000360","https://openalex.org/W2243226955","https://openalex.org/W2246249023","https://openalex.org/W2277498883","https://openalex.org/W2283758531","https://openalex.org/W2287059900","https://openalex.org/W2293804193","https://openalex.org/W2295579880","https://openalex.org/W2314395941","https://openalex.org/W2325939864","https://openalex.org/W2397302540","https://openalex.org/W3145293811","https://openalex.org/W6610017368"],"related_works":["https://openalex.org/W2990982991","https://openalex.org/W4317383455","https://openalex.org/W2548511587","https://openalex.org/W4293232884","https://openalex.org/W2422472940","https://openalex.org/W2019475500","https://openalex.org/W2548162870","https://openalex.org/W2138847091","https://openalex.org/W156213964","https://openalex.org/W3044690502"],"abstract_inverted_index":{"In":[0,48],"this":[1],"paper,":[2],"we":[3,51,81],"present":[4],"our":[5,49],"methods":[6],"to":[7,27,147],"the":[8,15,20,31,36,39,102,126,131,136,158,164],"Audio-Video":[9],"Based":[10],"Emotion":[11,17],"Recognition":[12,18],"subtask":[13],"in":[14,19,38],"2016":[16],"Wild":[21],"(EmotiW)":[22],"Challenge.":[23],"The":[24,64,95],"task":[25],"is":[26,99,125,135],"predict":[28],"one":[29],"of":[30,118,168],"seven":[32],"basic":[33],"emotions":[34],"for":[35,115,140],"characters":[37],"video":[40,61],"clips":[41],"extracted":[42],"from":[43,56],"movies":[44],"or":[45],"TV":[46],"shows.":[47],"approach,":[50],"explore":[52],"various":[53],"multimodal":[54],"features":[55,66,84,92,129,151],"audio,":[57],"facial":[58,132],"image":[59,78],"and":[60,73,86,90,111,130,152],"motion":[62,103],"modalities.":[63],"audio":[65],"contain":[67],"statistical":[68],"acoustic":[69,128],"features,":[70,80],"MFCC":[71,74,122],"Bag-of-Audio-Words":[72],"Fisher":[75],"Vectors.":[76],"For":[77],"related":[79,104],"extract":[82],"hand-crafted":[83],"(LBP-TOP":[85],"SPM":[87],"Dense":[88,97],"SIFT)":[89],"learned":[91],"(CNN":[93],"features).":[94],"improved":[96],"Trajectory":[98],"used":[100],"as":[101],"features.":[105],"We":[106,143],"train":[107],"SVM,":[108],"Random":[109],"Forest":[110],"Logistic":[112],"Regression":[113],"classifiers":[114],"each":[116],"kind":[117],"feature.":[119],"Among":[120],"them,":[121],"fisher":[123],"vector":[124],"best":[127],"CNN":[133],"feature":[134,139],"most":[137],"discriminative":[138],"emotion":[141],"recognition.":[142],"utilize":[144],"late":[145],"fusion":[146],"combine":[148],"different":[149],"modality":[150],"achieve":[153],"a":[154],"50.76%":[155],"accuracy":[156,167],"on":[157],"testing":[159],"set,":[160],"which":[161],"significantly":[162],"outperforms":[163],"baseline":[165],"test":[166],"40.47%.":[169]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
