{"id":"https://openalex.org/W2703895418","doi":"https://doi.org/10.1109/tcsvt.2017.2719043","title":"Learning Affective Features With a Hybrid Deep Model for Audio\u2013Visual Emotion Recognition","display_name":"Learning Affective Features With a Hybrid Deep Model for Audio\u2013Visual Emotion Recognition","publication_year":2017,"publication_date":"2017-06-23","ids":{"openalex":"https://openalex.org/W2703895418","doi":"https://doi.org/10.1109/tcsvt.2017.2719043","mag":"2703895418"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2017.2719043","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2017.2719043","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101891025","display_name":"Shiqing Zhang","orcid":"https://orcid.org/0000-0001-8184-5088"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I82760581","display_name":"Taizhou University","ror":"https://ror.org/04fzhyx73","country_code":"CN","type":"education","lineage":["https://openalex.org/I82760581"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shiqing Zhang","raw_affiliation_strings":["Institute of Digital Media, School of Electronic Engineering and Computer Science, Peking University, Beijing, China","Institute of Intelligent Information Processing, Taizhou University, Taizhou, China"],"affiliations":[{"raw_affiliation_string":"Institute of Digital Media, School of Electronic Engineering and Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Institute of Intelligent Information Processing, Taizhou University, Taizhou, China","institution_ids":["https://openalex.org/I82760581"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055433405","display_name":"Shiliang Zhang","orcid":"https://orcid.org/0000-0001-9053-9314"},"institutions":[{"id":"https://openalex.org/I82760581","display_name":"Taizhou University","ror":"https://ror.org/04fzhyx73","country_code":"CN","type":"education","lineage":["https://openalex.org/I82760581"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiliang Zhang","raw_affiliation_strings":["Institute of Digital Media, School of Electronic Engineering and Computer Science, Peking University, Beijing, China","Institute of Intelligent Information Processing, Taizhou University, Taizhou, China"],"affiliations":[{"raw_affiliation_string":"Institute of Digital Media, School of Electronic Engineering and Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Institute of Intelligent Information Processing, Taizhou University, Taizhou, China","institution_ids":["https://openalex.org/I82760581"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058066577","display_name":"Tiejun Huang","orcid":"https://orcid.org/0000-0002-4234-6099"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tiejun Huang","raw_affiliation_strings":["Institute of Digital Media, School of Electronic Engineering and Computer Science, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Digital Media, School of Electronic Engineering and Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018478553","display_name":"Wen Gao","orcid":"https://orcid.org/0000-0002-8070-802X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Gao","raw_affiliation_strings":["Institute of Digital Media, School of Electronic Engineering and Computer Science, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Digital Media, School of Electronic Engineering and Computer Science, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111504451","display_name":"Qi Tian","orcid":"https://orcid.org/0009-0003-2676-5300"},"institutions":[{"id":"https://openalex.org/I45438204","display_name":"The University of Texas at San Antonio","ror":"https://ror.org/01kd65564","country_code":"US","type":"education","lineage":["https://openalex.org/I45438204"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qi Tian","raw_affiliation_strings":["Department of Computer Science, The University of Texas at San Antonio, San Antonio, TX, USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, The University of Texas at San Antonio, San Antonio, TX, USA","institution_ids":["https://openalex.org/I45438204"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101891025"],"corresponding_institution_ids":["https://openalex.org/I20231570","https://openalex.org/I82760581"],"apc_list":null,"apc_paid":null,"fwci":20.5301,"has_fulltext":false,"cited_by_count":342,"citation_normalized_percentile":{"value":0.99657464,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"28","issue":"10","first_page":"3030","last_page":"3043"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9904000163078308,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6832258701324463},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.561165452003479},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5598656535148621},{"id":"https://openalex.org/keywords/emotion-recognition","display_name":"Emotion recognition","score":0.5281481742858887},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4662289619445801},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.46237727999687195},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.42576563358306885},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.3286283314228058},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32760193943977356},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.31430619955062866},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.24664410948753357}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6832258701324463},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.561165452003479},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5598656535148621},{"id":"https://openalex.org/C2777438025","wikidata":"https://www.wikidata.org/wiki/Q1339090","display_name":"Emotion recognition","level":2,"score":0.5281481742858887},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4662289619445801},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.46237727999687195},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.42576563358306885},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3286283314228058},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32760193943977356},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.31430619955062866},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.24664410948753357}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2017.2719043","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2017.2719043","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.7699999809265137}],"awards":[{"id":"https://openalex.org/G4263706455","display_name":null,"funder_award_id":"91538111","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5353053316","display_name":null,"funder_award_id":"LY16F020011","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6197875348","display_name":null,"funder_award_id":"61620106009","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6372974985","display_name":null,"funder_award_id":"61429201","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7612298304","display_name":null,"funder_award_id":"61572050","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":76,"referenced_works":["https://openalex.org/W99485238","https://openalex.org/W147964346","https://openalex.org/W1522734439","https://openalex.org/W1578856370","https://openalex.org/W1651753422","https://openalex.org/W1922126009","https://openalex.org/W1930223417","https://openalex.org/W1976066595","https://openalex.org/W1992789203","https://openalex.org/W1995562189","https://openalex.org/W2009059481","https://openalex.org/W2020676607","https://openalex.org/W2026243162","https://openalex.org/W2038821742","https://openalex.org/W2050748687","https://openalex.org/W2055332436","https://openalex.org/W2056030034","https://openalex.org/W2056907797","https://openalex.org/W2064675550","https://openalex.org/W2066200210","https://openalex.org/W2068124150","https://openalex.org/W2071249869","https://openalex.org/W2074788634","https://openalex.org/W2075953807","https://openalex.org/W2080289724","https://openalex.org/W2091007025","https://openalex.org/W2095540482","https://openalex.org/W2096948193","https://openalex.org/W2100495367","https://openalex.org/W2109774206","https://openalex.org/W2112796928","https://openalex.org/W2116064496","https://openalex.org/W2123099218","https://openalex.org/W2124773960","https://openalex.org/W2125113755","https://openalex.org/W2126552487","https://openalex.org/W2136922672","https://openalex.org/W2139916508","https://openalex.org/W2143350951","https://openalex.org/W2145310492","https://openalex.org/W2156503193","https://openalex.org/W2157297238","https://openalex.org/W2158061940","https://openalex.org/W2158198839","https://openalex.org/W2158275940","https://openalex.org/W2163026698","https://openalex.org/W2163605009","https://openalex.org/W2164699598","https://openalex.org/W2168692779","https://openalex.org/W2168854967","https://openalex.org/W2172166488","https://openalex.org/W2184188583","https://openalex.org/W2184902314","https://openalex.org/W2234589100","https://openalex.org/W2243226955","https://openalex.org/W2332352554","https://openalex.org/W2336160298","https://openalex.org/W2338772450","https://openalex.org/W2394859829","https://openalex.org/W2400826247","https://openalex.org/W2409534643","https://openalex.org/W2508202655","https://openalex.org/W2510815162","https://openalex.org/W2519803806","https://openalex.org/W2520433280","https://openalex.org/W2548128734","https://openalex.org/W2586286573","https://openalex.org/W2952432176","https://openalex.org/W3097096317","https://openalex.org/W3141819983","https://openalex.org/W6602113894","https://openalex.org/W6604030209","https://openalex.org/W6684191040","https://openalex.org/W6685405536","https://openalex.org/W6686207219","https://openalex.org/W6726951882"],"related_works":["https://openalex.org/W2271369634","https://openalex.org/W2350550760","https://openalex.org/W578794879","https://openalex.org/W2625296515","https://openalex.org/W3137890128","https://openalex.org/W2068608913","https://openalex.org/W4245955731","https://openalex.org/W2393726419","https://openalex.org/W2380912101","https://openalex.org/W3126677997"],"abstract_inverted_index":{"Emotion":[0],"recognition":[1,89],"is":[2,66,121,151,194],"challenging":[3],"due":[4],"to":[5,28,91,123,139],"the":[6,16,30,100,166,170,175,180,184,188],"emotional":[7,31,163],"gap":[8,32],"between":[9],"emotions":[10],"and":[11,50,73,81,94,104,174,200,206],"audio-visual":[12,42,54,128,162,209],"features.":[13],"Motivated":[14],"by":[15,33,137],"powerful":[17],"feature":[18,130],"learning":[19],"ability":[20],"of":[21,102,183,190],"deep":[22,37],"neural":[23],"networks,":[24],"this":[25,193],"paper":[26],"proposes":[27],"bridge":[29],"using":[34],"a":[35,58,110,115,126,141,146],"hybrid":[36],"model,":[38],"which":[39],"first":[40],"produces":[41],"segment":[43,55,96,129,134],"features":[44,56,135],"with":[45,114,203],"Convolutional":[46],"Neural":[47],"Networks":[48,61],"(CNNs)":[49],"3D-CNN,":[51,205],"then":[52],"fuses":[53],"in":[57,68],"Deep":[59],"Belief":[60],"(DBNs).":[62],"The":[63,118],"proposed":[64,185],"method":[65],"trained":[67,122],"two":[69],"stages.":[70],"First,":[71],"CNN":[72,103],"3D-CNN":[74,105],"models":[75,106],"pre-trained":[76],"on":[77,87,159],"corresponding":[78],"large-scale":[79],"image":[80],"video":[82,144,154],"classification":[83],"tasks":[84,90],"are":[85,107],"fine-tuned":[86],"emotion":[88,155,210],"learn":[92,125],"audio":[93,199],"visual":[95,201],"features,":[97],"respectively.":[98],"Second,":[99],"outputs":[101],"combined":[108],"into":[109],"fusion":[111,119],"network":[112,120],"built":[113],"DBN":[116,138,207],"model.":[117],"jointly":[124],"discriminative":[127],"representation.":[131],"After":[132],"average-pooling":[133],"learned":[136],"form":[140],"fixed-length":[142],"global":[143],"feature,":[145],"linear":[147],"Support":[148],"Vector":[149],"Machine":[150],"used":[152],"for":[153,208],"classification.":[156],"Experimental":[157],"results":[158],"three":[160],"public":[161],"databases,":[164],"including":[165],"acted":[167,171],"RML":[168],"database,":[169,173,178],"eNTERFACE05":[172],"spontaneous":[176],"BAUM-1s":[177],"demonstrate":[179],"promising":[181],"performance":[182],"method.":[186],"To":[187],"best":[189],"our":[191],"knowledge,":[192],"an":[195],"early":[196],"work":[197],"fusing":[198],"cues":[202],"CNN,":[204],"recognition.":[211]},"counts_by_year":[{"year":2026,"cited_by_count":8},{"year":2025,"cited_by_count":27},{"year":2024,"cited_by_count":40},{"year":2023,"cited_by_count":45},{"year":2022,"cited_by_count":57},{"year":2021,"cited_by_count":55},{"year":2020,"cited_by_count":43},{"year":2019,"cited_by_count":44},{"year":2018,"cited_by_count":22},{"year":2017,"cited_by_count":1}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
