{"id":"https://openalex.org/W2803166488","doi":"https://doi.org/10.1145/3115932","title":"Structure-Aware Multimodal Feature Fusion for RGB-D Scene Classification and Beyond","display_name":"Structure-Aware Multimodal Feature Fusion for RGB-D Scene Classification and Beyond","publication_year":2018,"publication_date":"2018-04-30","ids":{"openalex":"https://openalex.org/W2803166488","doi":"https://doi.org/10.1145/3115932","mag":"2803166488"},"language":"en","primary_location":{"id":"doi:10.1145/3115932","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3115932","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100608464","display_name":"Anran Wang","orcid":"https://orcid.org/0000-0002-1702-9323"},"institutions":[{"id":"https://openalex.org/I141568987","display_name":"Hong Kong Baptist University","ror":"https://ror.org/0145fw131","country_code":"HK","type":"education","lineage":["https://openalex.org/I141568987"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Anran Wang","raw_affiliation_strings":["Hong Kong Baptist University"],"affiliations":[{"raw_affiliation_string":"Hong Kong Baptist University","institution_ids":["https://openalex.org/I141568987"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100635804","display_name":"Jianfei Cai","orcid":"https://orcid.org/0000-0002-9444-3763"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Jianfei Cai","raw_affiliation_strings":["Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100460385","display_name":"Jiwen Lu","orcid":"https://orcid.org/0000-0002-6121-5529"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiwen Lu","raw_affiliation_strings":["Tsinghua University, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065578418","display_name":"Tat\u2010Jen Cham","orcid":"https://orcid.org/0000-0001-5264-2572"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Tat-Jen Cham","raw_affiliation_strings":["Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100608464"],"corresponding_institution_ids":["https://openalex.org/I141568987"],"apc_list":null,"apc_paid":null,"fwci":0.7312,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.76083939,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"14","issue":"2s","first_page":"1","last_page":"22"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12740","display_name":"Gait Recognition and Analysis","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7873737215995789},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7446151971817017},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7067453265190125},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.6952078938484192},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.6382973194122314},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6171377301216125},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5931172966957092},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.47485947608947754},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4657348692417145},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4153056740760803},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.411469042301178},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3521517515182495}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7873737215995789},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7446151971817017},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7067453265190125},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.6952078938484192},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.6382973194122314},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6171377301216125},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5931172966957092},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.47485947608947754},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4657348692417145},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4153056740760803},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.411469042301178},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3521517515182495},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3115932","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3115932","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},{"id":"pmh:oai:dr.ntu.edu.sg:10356/138263","is_oa":false,"landing_page_url":"https://hdl.handle.net/10356/138263","pdf_url":null,"source":{"id":"https://openalex.org/S4306402609","display_name":"DR-NTU (Nanyang Technological University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I172675005","host_organization_name":"Nanyang Technological University","host_organization_lineage":["https://openalex.org/I172675005"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Journal Article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.7200000286102295,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320709","display_name":"National Research Foundation Singapore","ror":"https://ror.org/03cpyc314"},{"id":"https://openalex.org/F4320320766","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W78159342","https://openalex.org/W125693051","https://openalex.org/W130423592","https://openalex.org/W315880484","https://openalex.org/W1524680991","https://openalex.org/W1565402342","https://openalex.org/W1606858007","https://openalex.org/W1744759976","https://openalex.org/W1755205674","https://openalex.org/W1758727174","https://openalex.org/W1893516992","https://openalex.org/W1895914852","https://openalex.org/W1919033285","https://openalex.org/W1923026787","https://openalex.org/W1923184257","https://openalex.org/W1923332106","https://openalex.org/W1923404803","https://openalex.org/W1966385142","https://openalex.org/W1969366022","https://openalex.org/W1985238052","https://openalex.org/W1991367009","https://openalex.org/W2012592962","https://openalex.org/W2019660985","https://openalex.org/W2034014085","https://openalex.org/W2038765747","https://openalex.org/W2059042271","https://openalex.org/W2064675550","https://openalex.org/W2066813062","https://openalex.org/W2067912884","https://openalex.org/W2085411191","https://openalex.org/W2085735683","https://openalex.org/W2105101328","https://openalex.org/W2108598243","https://openalex.org/W2108661909","https://openalex.org/W2110819057","https://openalex.org/W2116742884","https://openalex.org/W2121058967","https://openalex.org/W2126574503","https://openalex.org/W2129812935","https://openalex.org/W2134670479","https://openalex.org/W2135046866","https://openalex.org/W2138019504","https://openalex.org/W2142194269","https://openalex.org/W2144567071","https://openalex.org/W2146055337","https://openalex.org/W2155284717","https://openalex.org/W2156303437","https://openalex.org/W2157751286","https://openalex.org/W2157982431","https://openalex.org/W2163292664","https://openalex.org/W2165260680","https://openalex.org/W2217325140","https://openalex.org/W2219747066","https://openalex.org/W2244252827","https://openalex.org/W2397854647","https://openalex.org/W2444163375","https://openalex.org/W2478657783","https://openalex.org/W2618530766","https://openalex.org/W2963173190","https://openalex.org/W2964164518","https://openalex.org/W3005708632","https://openalex.org/W3023742835","https://openalex.org/W4230005465","https://openalex.org/W4297683907"],"related_works":["https://openalex.org/W3208297503","https://openalex.org/W156213964","https://openalex.org/W3119773509","https://openalex.org/W2889153461","https://openalex.org/W2964117661","https://openalex.org/W4388405611","https://openalex.org/W2619127353","https://openalex.org/W2786094008","https://openalex.org/W3131501806","https://openalex.org/W2799683370"],"abstract_inverted_index":{"While":[0],"convolutional":[1],"neural":[2],"networks":[3],"(CNNs)":[4],"have":[5],"been":[6],"excellent":[7],"for":[8,28,158,206,213,258],"object":[9],"recognition,":[10,215,260],"the":[11,21,44,177,243,267,270],"greater":[12,39],"spatial":[13,40],"variability":[14],"in":[15,42],"scene":[16,29,114,173],"images":[17],"typically":[18],"means":[19],"that":[20,140,200,250],"standard":[22],"full-image":[23],"CNN":[24,51,66,165],"features":[25,67,120,162,232],"are":[26,68,124,135,168],"suboptimal":[27],"classification.":[30],"In":[31,128,211],"this":[32],"article,":[33],"we":[34,92,167,186,216],"investigate":[35],"a":[36,55,100],"framework":[37,192,202],"allowing":[38],"flexibility,":[41],"which":[43],"Fisher":[45],"vector":[46],"(FV)-encoded":[47],"distribution":[48],"of":[49,57,76,79,103,269],"local":[50],"features,":[52,166],"obtained":[53],"from":[54,70,87,121,233],"multitude":[56],"region":[58,104],"proposals":[59,105],"per":[60],"image,":[61],"is":[62,254],"considered":[63],"instead.":[64],"The":[65],"computed":[69],"an":[71,194],"augmented":[72],"pixel-wise":[73],"representation":[74],"consisting":[75],"multiple":[77],"modalities":[78,123,238],"RGB,":[80],"HHA,":[81],"and":[82,106,116,147,155,163,180,226,236,245],"surface":[83],"normals,":[84],"as":[85],"extracted":[86],"RGB-D":[88],"data.":[89],"More":[90],"significantly,":[91],"make":[93,230],"two":[94],"postulates:":[95],"(1)":[96],"component":[97],"sparsity\u2014that":[98],"only":[99],"small":[101],"variety":[102],"their":[107],"corresponding":[108],"FV":[109,161],"GMM":[110,145],"components":[111,146],"contribute":[112],"to":[113,126,144,170,198,220,229],"discriminability,":[115],"(2)":[117],"modal":[118],"nonsparsity\u2014that":[119],"all":[122],"encouraged":[125],"coexist.":[127,239],"our":[129,189,201,251],"proposed":[130],"feature":[131,190,252],"fusion":[132,191,253],"framework,":[133],"these":[134],"implemented":[136],"through":[137],"regularization":[138],"terms":[139],"apply":[141,188],"group":[142,149],"lasso":[143,150],"exclusive":[148],"across":[151],"modalities.":[152],"By":[153],"learning":[154],"combining":[156],"regressors":[157],"both":[159,234],"proposal-based":[160],"global":[164],"able":[169],"achieve":[171],"state-of-the-art":[172],"classification":[174],"performance":[175,264],"on":[176,193,242],"SUNRGBD":[178],"Dataset":[179,183],"NYU":[181],"Depth":[182],"V2.":[184],"Moreover,":[185],"further":[187],"action":[195,214,259],"recognition":[196],"task":[197],"demonstrate":[199],"can":[203],"be":[204],"generalized":[205],"other":[207],"multimodal":[208],"well-structured":[209],"features.":[210],"particular,":[212],"enforce":[217],"interpart":[218],"sparsity":[219],"choose":[221],"more":[222],"discriminative":[223],"body":[224],"parts,":[225],"intermodal":[227],"nonsparsity":[228],"informative":[231],"appearance":[235],"motion":[237],"Experimental":[240],"results":[241],"JHMDB":[244],"MPII":[246],"Cooking":[247],"Datasets":[248],"show":[249],"also":[255],"very":[256,262],"effective":[257],"achieving":[261],"competitive":[263],"compared":[265],"with":[266],"state":[268],"art.":[271]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
