{"id":"https://openalex.org/W2526449353","doi":"https://doi.org/10.1145/2964284.2964297","title":"Multilayer and Multimodal Fusion of Deep Neural Networks for Video Classification","display_name":"Multilayer and Multimodal Fusion of Deep Neural Networks for Video Classification","publication_year":2016,"publication_date":"2016-09-29","ids":{"openalex":"https://openalex.org/W2526449353","doi":"https://doi.org/10.1145/2964284.2964297","mag":"2526449353"},"language":"en","primary_location":{"id":"doi:10.1145/2964284.2964297","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2964284.2964297","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM international conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100755421","display_name":"Xiaodong Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xiaodong Yang","raw_affiliation_strings":["NVIDIA, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066945976","display_name":"Pavlo Molchanov","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pavlo Molchanov","raw_affiliation_strings":["NVIDIA, Santa Clara, CA, USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Santa Clara, CA, USA","institution_ids":["https://openalex.org/I4210127875"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056503617","display_name":"Jan Kautz","orcid":"https://orcid.org/0000-0002-8830-429X"},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jan Kautz","raw_affiliation_strings":["NVIDIA, Westford, MA, USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA, Westford, MA, USA","institution_ids":["https://openalex.org/I4210127875"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100755421"],"corresponding_institution_ids":["https://openalex.org/I4210127875"],"apc_list":null,"apc_paid":null,"fwci":8.4683,"has_fulltext":false,"cited_by_count":113,"citation_normalized_percentile":{"value":0.98541777,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"978","last_page":"987"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.809005618095398},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6758512854576111},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6694108247756958},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.6096251606941223},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.6041763424873352},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.5640645027160645},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5414862632751465},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5311948657035828},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.49602898955345154},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.48966649174690247},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.41992273926734924},{"id":"https://openalex.org/keywords/fusion-mechanism","display_name":"Fusion mechanism","score":0.41093912720680237},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.37222927808761597},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3665919601917267},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.29019981622695923}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.809005618095398},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6758512854576111},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6694108247756958},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.6096251606941223},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.6041763424873352},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.5640645027160645},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5414862632751465},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5311948657035828},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.49602898955345154},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.48966649174690247},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.41992273926734924},{"id":"https://openalex.org/C173414695","wikidata":"https://www.wikidata.org/wiki/Q5510276","display_name":"Fusion mechanism","level":4,"score":0.41093912720680237},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37222927808761597},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3665919601917267},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.29019981622695923},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C103038307","wikidata":"https://www.wikidata.org/wiki/Q6556360","display_name":"Lipid bilayer fusion","level":3,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2964284.2964297","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2964284.2964297","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM international conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W24089286","https://openalex.org/W300523764","https://openalex.org/W337346652","https://openalex.org/W764651262","https://openalex.org/W914561379","https://openalex.org/W1566135517","https://openalex.org/W1578080815","https://openalex.org/W1604621531","https://openalex.org/W1606858007","https://openalex.org/W1686810756","https://openalex.org/W1849277567","https://openalex.org/W1867429401","https://openalex.org/W1871385855","https://openalex.org/W1903029394","https://openalex.org/W1923404803","https://openalex.org/W1944615693","https://openalex.org/W1947481528","https://openalex.org/W1948751323","https://openalex.org/W1950136256","https://openalex.org/W1964514974","https://openalex.org/W1980176153","https://openalex.org/W1983364832","https://openalex.org/W1987739276","https://openalex.org/W2005750530","https://openalex.org/W2005876975","https://openalex.org/W2012592962","https://openalex.org/W2016053056","https://openalex.org/W2020163092","https://openalex.org/W2025954386","https://openalex.org/W2026355154","https://openalex.org/W2027922120","https://openalex.org/W2048783874","https://openalex.org/W2067646051","https://openalex.org/W2109235804","https://openalex.org/W2112020727","https://openalex.org/W2116435618","https://openalex.org/W2118585731","https://openalex.org/W2122476475","https://openalex.org/W2124372976","https://openalex.org/W2126579184","https://openalex.org/W2139594308","https://openalex.org/W2142194269","https://openalex.org/W2145295623","https://openalex.org/W2147768505","https://openalex.org/W2155490028","https://openalex.org/W2156303437","https://openalex.org/W2163605009","https://openalex.org/W2184188583","https://openalex.org/W2284126738","https://openalex.org/W2395611524","https://openalex.org/W2401154299","https://openalex.org/W2613718673","https://openalex.org/W2618530766","https://openalex.org/W2949650786","https://openalex.org/W2952453038","https://openalex.org/W2953106684","https://openalex.org/W2953130678","https://openalex.org/W2963355447","https://openalex.org/W6600983433","https://openalex.org/W6677656871","https://openalex.org/W6678029491","https://openalex.org/W6686207219","https://openalex.org/W6687483927","https://openalex.org/W6717177737"],"related_works":["https://openalex.org/W2125652721","https://openalex.org/W1540371141","https://openalex.org/W1549363203","https://openalex.org/W2154063878","https://openalex.org/W4231274751","https://openalex.org/W2556012038","https://openalex.org/W1489772951","https://openalex.org/W1538046993","https://openalex.org/W2571255492","https://openalex.org/W4284687509"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,22,28,37,58,88,119],"novel":[4],"framework":[5],"to":[6,25,67,92,109],"combine":[7],"multiple":[8,75,114],"layers":[9,45,98,115],"and":[10,34,42,71,116,136],"modalities":[11,66,117],"of":[12,30,32,113],"deep":[13],"neural":[14],"networks":[15],"for":[16,80],"video":[17],"classification.":[18],"We":[19,55],"first":[20],"propose":[21,87],"multilayer":[23],"strategy":[24],"simultaneously":[26],"capture":[27],"variety":[29],"levels":[31],"abstraction":[33],"invariance":[35],"in":[36,118],"network,":[38],"where":[39],"the":[40,82,111,123],"convolutional":[41],"fully":[43,96],"connected":[44,97],"are":[46],"effectively":[47,93],"represented":[48],"by":[49],"our":[50],"proposed":[51],"feature":[52],"aggregation":[53],"methods.":[54],"further":[56],"introduce":[57],"multimodal":[59],"scheme":[60],"that":[61],"includes":[62],"four":[63],"highly":[64],"complementary":[65],"extract":[68],"diverse":[69],"static":[70],"dynamic":[72],"cues":[73],"at":[74],"temporal":[76,84],"scales.":[77],"In":[78,122],"particular,":[79],"modeling":[81],"long-term":[83],"information,":[85],"we":[86,126],"new":[89],"structure,":[90],"FC-RNN,":[91],"transform":[94],"pre-trained":[95],"into":[99],"recurrent":[100],"layers.":[101],"A":[102],"robust":[103],"boosting":[104],"model":[105],"is":[106],"then":[107],"introduced":[108],"optimize":[110],"fusion":[112],"unified":[120],"way.":[121],"extensive":[124],"experiments,":[125],"achieve":[127],"state-of-the-art":[128],"results":[129],"on":[130],"two":[131],"public":[132],"benchmark":[133],"datasets:":[134],"UCF101":[135],"HMDB51.":[137]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":14},{"year":2021,"cited_by_count":15},{"year":2020,"cited_by_count":11},{"year":2019,"cited_by_count":19},{"year":2018,"cited_by_count":23},{"year":2017,"cited_by_count":8}],"updated_date":"2026-03-15T09:29:46.208133","created_date":"2025-10-10T00:00:00"}
