{"id":"https://openalex.org/W2895006884","doi":"https://doi.org/10.1145/3242969.3264992","title":"Multiple Spatio-temporal Feature Learning for Video-based Emotion Recognition in the Wild","display_name":"Multiple Spatio-temporal Feature Learning for Video-based Emotion Recognition in the Wild","publication_year":2018,"publication_date":"2018-10-02","ids":{"openalex":"https://openalex.org/W2895006884","doi":"https://doi.org/10.1145/3242969.3264992","mag":"2895006884"},"language":"en","primary_location":{"id":"doi:10.1145/3242969.3264992","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3242969.3264992","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 20th ACM International Conference on Multimodal Interaction","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054796879","display_name":"Cheng Lu","orcid":"https://orcid.org/0000-0002-1477-1020"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Cheng Lu","raw_affiliation_strings":["Southeast University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029771864","display_name":"Wenming Zheng","orcid":"https://orcid.org/0000-0002-7764-5179"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenming Zheng","raw_affiliation_strings":["Southeast University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101437925","display_name":"Chaolong Li","orcid":"https://orcid.org/0000-0003-1092-6811"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chaolong Li","raw_affiliation_strings":["Southeast University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038686056","display_name":"Chuangao Tang","orcid":"https://orcid.org/0000-0002-3653-136X"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chuangao Tang","raw_affiliation_strings":["Southeast University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102760584","display_name":"Suyuan Liu","orcid":"https://orcid.org/0000-0003-1481-5393"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Suyuan Liu","raw_affiliation_strings":["Southeast University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010114262","display_name":"Simeng Yan","orcid":null},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Simeng Yan","raw_affiliation_strings":["Southeast University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027316177","display_name":"Yuan Zong","orcid":"https://orcid.org/0000-0002-0839-8792"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuan Zong","raw_affiliation_strings":["Southeast University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5054796879"],"corresponding_institution_ids":["https://openalex.org/I76569877"],"apc_list":null,"apc_paid":null,"fwci":8.3269,"has_fulltext":false,"cited_by_count":76,"citation_normalized_percentile":{"value":0.97818214,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"646","last_page":"652"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8099088668823242},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7482835054397583},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.61020827293396},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5650976300239563},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5530857443809509},{"id":"https://openalex.org/keywords/spectrogram","display_name":"Spectrogram","score":0.5216324925422668},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.5012025833129883},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4400838315486908},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.43108898401260376},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.4272609055042267},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4167136549949646},{"id":"https://openalex.org/keywords/facial-expression","display_name":"Facial expression","score":0.41362959146499634},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.41080084443092346}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8099088668823242},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7482835054397583},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.61020827293396},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5650976300239563},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5530857443809509},{"id":"https://openalex.org/C45273575","wikidata":"https://www.wikidata.org/wiki/Q578970","display_name":"Spectrogram","level":2,"score":0.5216324925422668},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.5012025833129883},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4400838315486908},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.43108898401260376},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.4272609055042267},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4167136549949646},{"id":"https://openalex.org/C195704467","wikidata":"https://www.wikidata.org/wiki/Q327968","display_name":"Facial expression","level":2,"score":0.41362959146499634},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.41080084443092346},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3242969.3264992","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3242969.3264992","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 20th ACM International Conference on Multimodal Interaction","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.7099999785423279}],"awards":[{"id":"https://openalex.org/G7315528787","display_name":null,"funder_award_id":"61572009","funder_id":"https://openalex.org/F4320335595","funder_display_name":"National Natural Science Foundation of China-Yunnan Joint Fund"}],"funders":[{"id":"https://openalex.org/F4320335595","display_name":"National Natural Science Foundation of China-Yunnan Joint Fund","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W179875071","https://openalex.org/W1522734439","https://openalex.org/W1686810756","https://openalex.org/W1974932989","https://openalex.org/W1981918162","https://openalex.org/W1983364832","https://openalex.org/W2032254851","https://openalex.org/W2064675550","https://openalex.org/W2074788634","https://openalex.org/W2079735306","https://openalex.org/W2085662862","https://openalex.org/W2108598243","https://openalex.org/W2132719147","https://openalex.org/W2149933564","https://openalex.org/W2163605009","https://openalex.org/W2194775991","https://openalex.org/W2277498883","https://openalex.org/W2325939864","https://openalex.org/W2341528187","https://openalex.org/W2523915246","https://openalex.org/W2546875627","https://openalex.org/W2548264631","https://openalex.org/W2767618761","https://openalex.org/W2769114491","https://openalex.org/W2888683367","https://openalex.org/W2963252191","https://openalex.org/W3101998545"],"related_works":["https://openalex.org/W2530685530","https://openalex.org/W4375868962","https://openalex.org/W2088854863","https://openalex.org/W2011227383","https://openalex.org/W1976719989","https://openalex.org/W2942893872","https://openalex.org/W2065606036","https://openalex.org/W3179495260","https://openalex.org/W3127543252","https://openalex.org/W2016904525"],"abstract_inverted_index":{"The":[0,23,87],"difficulty":[1],"of":[2,91,112,161,180,192,233,261,276,287,306],"emotion":[3,51,277],"recognition":[4,52,278],"in":[5,26,53,71,125,184,222,242,309],"the":[6,37,44,49,82,94,99,105,130,137,152,159,168,190,207,227,230,248,258,267,274,284,300,304],"wild":[7],"(EmotiW)":[8],"is":[9,39,89,213,270,291],"how":[10],"to":[11,16,40,104,118,167,176,189,211,216,246,272],"train":[12],"a":[13,57,185,243,254,295],"robust":[14],"model":[15,97],"deal":[17],"with":[18,32,151,257,299],"diverse":[19],"scenarios":[20],"and":[21,36,73,85,98,145,302],"anomalies.":[22],"Audio-video":[24],"Sub-challenge":[25],"EmotiW":[27],"contains":[28],"audio-video":[29],"short":[30],"clips":[31],"several":[33],"emotional":[34,69,219],"labels":[35],"task":[38],"distinguish":[41],"which":[42,64,147,293],"label":[43],"video":[45],"belongs":[46],"to.":[47],"For":[48,226],"better":[50],"videos,":[54],"we":[55],"propose":[56],"multiple":[58,223],"spatio-temporal":[59,195,263],"feature":[60],"fusion":[61,255],"(MSFF)":[62],"framework,":[63],"can":[65],"more":[66,251],"accurately":[67],"depict":[68],"information":[70,220],"spatial":[72,132],"temporal":[74],"dimensions":[75],"by":[76,136,155,205,236],"two":[77,92],"mutually":[78],"complementary":[79],"sources,":[80],"including":[81,143],"facial":[83,95,106,126,181],"image":[84,96,107],"audio.":[86],"framework":[88,245,269],"consisted":[90],"parts:":[93],"audio":[100,228],"model.":[101],"With":[102],"respect":[103],"model,":[108,229],"three":[109],"different":[110,123,262],"architectures":[111],"spatial-temporal":[113],"neural":[114,140],"networks":[115,141,264],"are":[116,134,148,164,239],"employed":[117],"extract":[119],"discriminative":[120],"features":[121,133,160],"about":[122],"emotions":[124],"expression":[127],"images.":[128],"Firstly,":[129],"high-level":[131],"obtained":[135],"pre-trained":[138],"convolutional":[139],"(CNN),":[142],"VGG-Face":[144],"ResNet-50":[146],"all":[149,162],"fed":[150],"images":[153,232],"generated":[154,235],"each":[156],"video.":[157,186],"Then,":[158],"frames":[163],"sequentially":[165],"input":[166],"Bi-directional":[169],"Long":[170],"Short-Term":[171],"Memory":[172],"(BLSTM)":[173],"so":[174],"as":[175],"capture":[177],"dynamic":[178],"variations":[179],"appearance":[182],"textures":[183],"In":[187],"addition":[188],"structure":[191],"CNN-RNN,":[193],"another":[194],"network,":[196],"namely":[197],"deep":[198],"3-Dimensional":[199],"Convolutional":[200],"Neural":[201],"Networks":[202],"(3D":[203],"CNN)":[204],"extending":[206],"2D":[208],"convolution":[209],"kernel":[210],"3D,":[212],"also":[214,240],"applied":[215],"attain":[217],"evolving":[218],"encoded":[221],"adjacent":[224],"frames.":[225],"spectrogram":[231],"speech":[234],"preprocessing":[237],"audio,":[238],"modeled":[241],"VGG-BLSTM":[244],"characterize":[247],"affective":[249],"fluctuation":[250],"efficiently.":[252],"Finally,":[253],"strategy":[256],"score":[259],"matrices":[260],"gained":[265],"from":[266],"above":[268],"proposed":[271,289],"boost":[273],"performance":[275],"complementally.":[279],"Extensive":[280],"experiments":[281],"show":[282],"that":[283],"overall":[285],"accuracy":[286],"our":[288],"MSFF":[290],"60.64%,":[292],"achieves":[294],"large":[296],"improvement":[297],"compared":[298],"baseline":[301],"outperform":[303],"result":[305],"champion":[307],"team":[308],"2017.":[310]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":13},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":14},{"year":2018,"cited_by_count":2}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
