{"id":"https://openalex.org/W4313476633","doi":"https://doi.org/10.1145/3576857","title":"A Novel Lightweight Audio-visual Saliency Model for Videos","display_name":"A Novel Lightweight Audio-visual Saliency Model for Videos","publication_year":2022,"publication_date":"2022-12-16","ids":{"openalex":"https://openalex.org/W4313476633","doi":"https://doi.org/10.1145/3576857"},"language":"en","primary_location":{"id":"doi:10.1145/3576857","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3576857","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002554183","display_name":"Dandan Zhu","orcid":"https://orcid.org/0000-0003-0329-6321"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]},{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Dandan Zhu","raw_affiliation_strings":["Institute of AI Education, Shanghai, East China Normal University and Key Laboratory of Artificial Intelligence, Ministry of Education, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Institute of AI Education, Shanghai, East China Normal University and Key Laboratory of Artificial Intelligence, Ministry of Education, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I66867065","https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039990525","display_name":"Xuan Shao","orcid":"https://orcid.org/0000-0002-4096-9428"},"institutions":[{"id":"https://openalex.org/I181326427","display_name":"Donghua University","ror":"https://ror.org/035psfh38","country_code":"CN","type":"education","lineage":["https://openalex.org/I181326427"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuan Shao","raw_affiliation_strings":["School of Computer Science and Technology, Donghua University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Donghua University, Shanghai, China","institution_ids":["https://openalex.org/I181326427"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085080585","display_name":"Qiangqiang Zhou","orcid":"https://orcid.org/0000-0002-5717-3290"},"institutions":[{"id":"https://openalex.org/I53592917","display_name":"Jiangxi Normal University","ror":"https://ror.org/05nkgk822","country_code":"CN","type":"education","lineage":["https://openalex.org/I53592917"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiangqiang Zhou","raw_affiliation_strings":["School of Software, Jiangxi Normal University, Ziyang Avenue, Nanchang"],"affiliations":[{"raw_affiliation_string":"School of Software, Jiangxi Normal University, Ziyang Avenue, Nanchang","institution_ids":["https://openalex.org/I53592917"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043405654","display_name":"Xiongkuo Min","orcid":"https://orcid.org/0000-0001-5693-0416"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiongkuo Min","raw_affiliation_strings":["Institute of Image Communication and Network Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Institute of Image Communication and Network Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064168853","display_name":"Guangtao Zhai","orcid":"https://orcid.org/0000-0001-8165-9322"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangtao Zhai","raw_affiliation_strings":["Institute of Image Communication and Network Engineering, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Institute of Image Communication and Network Engineering, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019708391","display_name":"Xiaokang Yang","orcid":"https://orcid.org/0000-0003-4029-3322"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaokang Yang","raw_affiliation_strings":["Key Laboratory of Artificial Intelligence, Ministry of Education, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Key Laboratory of Artificial Intelligence, Ministry of Education, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5002554183"],"corresponding_institution_ids":["https://openalex.org/I183067930","https://openalex.org/I66867065"],"apc_list":null,"apc_paid":null,"fwci":1.0214,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.77800318,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"19","issue":"4","first_page":"1","last_page":"22"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12032","display_name":"Multisensory perception and integration","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9028549790382385},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.6646614670753479},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5941790342330933},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.47829538583755493},{"id":"https://openalex.org/keywords/spatial-analysis","display_name":"Spatial analysis","score":0.45470884442329407},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.43179383873939514},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.42818158864974976},{"id":"https://openalex.org/keywords/human-visual-system-model","display_name":"Human visual system model","score":0.41676396131515503},{"id":"https://openalex.org/keywords/sensory-cue","display_name":"Sensory cue","score":0.41138333082199097},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4063138961791992},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.38916879892349243},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3840543031692505},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.10709074139595032},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.08952367305755615}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9028549790382385},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.6646614670753479},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5941790342330933},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.47829538583755493},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.45470884442329407},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.43179383873939514},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.42818158864974976},{"id":"https://openalex.org/C160086991","wikidata":"https://www.wikidata.org/wiki/Q5939193","display_name":"Human visual system model","level":3,"score":0.41676396131515503},{"id":"https://openalex.org/C111370547","wikidata":"https://www.wikidata.org/wiki/Q7451120","display_name":"Sensory cue","level":2,"score":0.41138333082199097},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4063138961791992},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.38916879892349243},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3840543031692505},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.10709074139595032},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.08952367305755615},{"id":"https://openalex.org/C62649853","wikidata":"https://www.wikidata.org/wiki/Q199687","display_name":"Remote sensing","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3576857","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3576857","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8084839836","display_name":null,"funder_award_id":"62001289","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":68,"referenced_works":["https://openalex.org/W191494004","https://openalex.org/W774034202","https://openalex.org/W1152443276","https://openalex.org/W1772076007","https://openalex.org/W1903029394","https://openalex.org/W1947031653","https://openalex.org/W1978479866","https://openalex.org/W2002781701","https://openalex.org/W2004294009","https://openalex.org/W2015394094","https://openalex.org/W2027179862","https://openalex.org/W2032007016","https://openalex.org/W2039313011","https://openalex.org/W2047670868","https://openalex.org/W2094432440","https://openalex.org/W2099329509","https://openalex.org/W2119577735","https://openalex.org/W2135835174","https://openalex.org/W2148154194","https://openalex.org/W2155479901","https://openalex.org/W2161025448","https://openalex.org/W2164084182","https://openalex.org/W2184568381","https://openalex.org/W2186222003","https://openalex.org/W2212216676","https://openalex.org/W2288514685","https://openalex.org/W2334831412","https://openalex.org/W2338972621","https://openalex.org/W2471855951","https://openalex.org/W2498738402","https://openalex.org/W2529272619","https://openalex.org/W2533370895","https://openalex.org/W2544224704","https://openalex.org/W2558906385","https://openalex.org/W2561270043","https://openalex.org/W2588443678","https://openalex.org/W2612135493","https://openalex.org/W2623012778","https://openalex.org/W2741835523","https://openalex.org/W2757028014","https://openalex.org/W2780708736","https://openalex.org/W2807746031","https://openalex.org/W2889942734","https://openalex.org/W2904099109","https://openalex.org/W2912100719","https://openalex.org/W2922086303","https://openalex.org/W2930163499","https://openalex.org/W2939124210","https://openalex.org/W2946259291","https://openalex.org/W2962965915","https://openalex.org/W2963503775","https://openalex.org/W2963581854","https://openalex.org/W2963685207","https://openalex.org/W2964114039","https://openalex.org/W2969741484","https://openalex.org/W2980565715","https://openalex.org/W2986131415","https://openalex.org/W3022565501","https://openalex.org/W3034287518","https://openalex.org/W3041053424","https://openalex.org/W3095348033","https://openalex.org/W3101840568","https://openalex.org/W3211965499","https://openalex.org/W4239147634","https://openalex.org/W4242177601","https://openalex.org/W4248927948","https://openalex.org/W4293665662","https://openalex.org/W4301409532"],"related_works":["https://openalex.org/W2271369634","https://openalex.org/W3147472394","https://openalex.org/W2047100085","https://openalex.org/W2350550760","https://openalex.org/W578794879","https://openalex.org/W2625296515","https://openalex.org/W3137890128","https://openalex.org/W1984634519","https://openalex.org/W4245955731","https://openalex.org/W2393726419"],"abstract_inverted_index":{"Audio":[0],"information":[1,25,54],"has":[2],"not":[3],"been":[4],"considered":[5],"an":[6,88,122],"important":[7],"factor":[8],"in":[9,26,180],"visual":[10,29,34,39,99,144,155],"attention":[11,35],"models":[12,36],"regardless":[13],"of":[14,23,74,176,182],"many":[15],"psychological":[16],"studies":[17,172],"that":[18],"have":[19],"shown":[20],"the":[21,27,52,72,80,93,104,111,138,153,163,174,177],"importance":[22],"audio":[24,85,116,141],"human":[28],"perception":[30],"system.":[31],"Since":[32],"existing":[33],"only":[37],"utilize":[38,84],"information,":[40],"their":[41],"performance":[42,175],"is":[43,79],"limited":[44,53],"but":[45],"also":[46],"requires":[47],"high-computational":[48],"complexity":[49],"due":[50],"to":[51,83,161],"available.":[55],"To":[56,71],"overcome":[57],"these":[58],"problems,":[59],"we":[60],"propose":[61],"a":[62,148],"lightweight":[63,105,124],"audio-visual":[64,164],"saliency":[65,95,158,165],"(LAVS)":[66],"model":[67,91,179],"for":[68,87,92],"video":[69,94],"sequences.":[70],"best":[73],"our":[75],"knowledge,":[76],"this":[77],"article":[78],"first":[81],"trial":[82],"cues":[86],"efficient":[89],"deep-learning":[90],"estimation.":[96],"First,":[97],"spatial-temporal":[98,143,149,154],"features":[100,117],"are":[101,118,159],"extracted":[102,119],"by":[103,120],"receptive":[106],"field":[107],"block":[108],"(RFB)":[109],"with":[110],"bidirectional":[112],"ConvLSTM":[113],"units.":[114],"Then,":[115],"using":[121],"improved":[123],"environment":[125],"sound":[126],"classification":[127],"model.":[128],"Subsequently,":[129],"deep":[130],"canonical":[131],"correlation":[132],"analysis":[133],"(DCCA)":[134],"aims":[135],"at":[136],"capturing":[137],"correspondence":[139],"between":[140],"and":[142,156,170,184],"features,":[145],"thus":[146],"obtaining":[147],"auditory":[150,157],"saliency.":[151],"Lastly,":[152],"fused":[160],"obtain":[162],"map.":[166],"Extensive":[167],"comparative":[168],"experiments":[169],"ablation":[171],"validate":[173],"LAVS":[178],"terms":[181],"effectiveness":[183],"complexity.":[185]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":5}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
