{"id":"https://openalex.org/W4404207656","doi":"https://doi.org/10.1007/s40747-024-01654-2","title":"Audio-visual event localization with dual temporal-aware scene understanding and image-text knowledge bridging","display_name":"Audio-visual event localization with dual temporal-aware scene understanding and image-text knowledge bridging","publication_year":2024,"publication_date":"2024-11-09","ids":{"openalex":"https://openalex.org/W4404207656","doi":"https://doi.org/10.1007/s40747-024-01654-2"},"language":"en","primary_location":{"id":"doi:10.1007/s40747-024-01654-2","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-024-01654-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-024-01654-2.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://link.springer.com/content/pdf/10.1007/s40747-024-01654-2.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065653766","display_name":"Pufen Zhang","orcid":"https://orcid.org/0000-0002-6694-6275"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Pufen Zhang","raw_affiliation_strings":["National Center for Materials Service Safety, University of Science and Technology Beijing, Xueyuan Road No. 30, Beijing, Haidian District, 100083, China"],"affiliations":[{"raw_affiliation_string":"National Center for Materials Service Safety, University of Science and Technology Beijing, Xueyuan Road No. 30, Beijing, Haidian District, 100083, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100784264","display_name":"Jiaxiang Wang","orcid":"https://orcid.org/0000-0003-3059-798X"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaxiang Wang","raw_affiliation_strings":["National Center for Materials Service Safety, University of Science and Technology Beijing, Xueyuan Road No. 30, Beijing, Haidian District, 100083, China"],"affiliations":[{"raw_affiliation_string":"National Center for Materials Service Safety, University of Science and Technology Beijing, Xueyuan Road No. 30, Beijing, Haidian District, 100083, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Meng Wan","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210108629","display_name":"Computer Network Information Center","ror":"https://ror.org/01s0wyf50","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210108629"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Wan","raw_affiliation_strings":["Computer Network Information Center, Chinese Academy of Sciences, Dongsheng South Road No. 2, Beijing, Haidian District, 100080, China"],"affiliations":[{"raw_affiliation_string":"Computer Network Information Center, Chinese Academy of Sciences, Dongsheng South Road No. 2, Beijing, Haidian District, 100080, China","institution_ids":["https://openalex.org/I4210108629","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100366521","display_name":"Song Zhang","orcid":"https://orcid.org/0000-0002-0245-9355"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Song Zhang","raw_affiliation_strings":["National Center for Materials Service Safety, University of Science and Technology Beijing, Xueyuan Road No. 30, Beijing, Haidian District, 100083, China"],"affiliations":[{"raw_affiliation_string":"National Center for Materials Service Safety, University of Science and Technology Beijing, Xueyuan Road No. 30, Beijing, Haidian District, 100083, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101433239","display_name":"Jing Jie","orcid":"https://orcid.org/0000-0001-8875-295X"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Jing","raw_affiliation_strings":["National Center for Materials Service Safety, University of Science and Technology Beijing, Xueyuan Road No. 30, Beijing, Haidian District, 100083, China"],"affiliations":[{"raw_affiliation_string":"National Center for Materials Service Safety, University of Science and Technology Beijing, Xueyuan Road No. 30, Beijing, Haidian District, 100083, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015519978","display_name":"Lianhong Ding","orcid":"https://orcid.org/0000-0002-0843-2065"},"institutions":[{"id":"https://openalex.org/I176432857","display_name":"Beijing Wuzi University","ror":"https://ror.org/00bd1d647","country_code":"CN","type":"education","lineage":["https://openalex.org/I176432857"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lianhong Ding","raw_affiliation_strings":["Beijing Wuzi University, Fuhe Road No. 1, Beijing, Tongzhou District, 100006, China"],"affiliations":[{"raw_affiliation_string":"Beijing Wuzi University, Fuhe Road No. 1, Beijing, Tongzhou District, 100006, China","institution_ids":["https://openalex.org/I176432857"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101483396","display_name":"Peng Shi","orcid":"https://orcid.org/0000-0002-5349-6383"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Shi","raw_affiliation_strings":["National Center for Materials Service Safety, University of Science and Technology Beijing, Xueyuan Road No. 30, Beijing, Haidian District, 100083, China"],"affiliations":[{"raw_affiliation_string":"National Center for Materials Service Safety, University of Science and Technology Beijing, Xueyuan Road No. 30, Beijing, Haidian District, 100083, China","institution_ids":["https://openalex.org/I92403157"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5065653766"],"corresponding_institution_ids":["https://openalex.org/I92403157"],"apc_list":{"value":1320,"currency":"GBP","value_usd":1619},"apc_paid":{"value":1320,"currency":"GBP","value_usd":1619},"fwci":0.6725,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.69419868,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"11","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.8592852354049683},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.6989039182662964},{"id":"https://openalex.org/keywords/dual","display_name":"Dual (grammatical number)","score":0.6426423192024231},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6425492167472839},{"id":"https://openalex.org/keywords/computational-intelligence","display_name":"Computational intelligence","score":0.5825631618499756},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.541219174861908},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5018613338470459},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4628962576389313},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4356365501880646},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.25598376989364624},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.09214848279953003},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.0750277042388916}],"concepts":[{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.8592852354049683},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.6989039182662964},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.6426423192024231},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6425492167472839},{"id":"https://openalex.org/C139502532","wikidata":"https://www.wikidata.org/wiki/Q1122090","display_name":"Computational intelligence","level":2,"score":0.5825631618499756},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.541219174861908},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5018613338470459},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4628962576389313},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4356365501880646},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.25598376989364624},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.09214848279953003},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0750277042388916},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s40747-024-01654-2","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-024-01654-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-024-01654-2.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:42edcdd0ac674b15bddd72bde5ed2c98","is_oa":true,"landing_page_url":"https://doaj.org/article/42edcdd0ac674b15bddd72bde5ed2c98","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Complex & Intelligent Systems, Vol 11, Iss 1, Pp 1-20 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1007/s40747-024-01654-2","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-024-01654-2","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-024-01654-2.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5799999833106995,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G5597463280","display_name":null,"funder_award_id":"No. 2023YFC3805703","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4404207656.pdf","grobid_xml":"https://content.openalex.org/works/W4404207656.grobid-xml"},"referenced_works_count":49,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W2131774270","https://openalex.org/W2194775991","https://openalex.org/W2526050071","https://openalex.org/W2593116425","https://openalex.org/W2931433835","https://openalex.org/W2932399282","https://openalex.org/W2963748441","https://openalex.org/W2964109005","https://openalex.org/W2990113535","https://openalex.org/W3015371781","https://openalex.org/W3015925607","https://openalex.org/W3021321555","https://openalex.org/W3093287838","https://openalex.org/W3094550259","https://openalex.org/W3118366136","https://openalex.org/W3138516171","https://openalex.org/W3170717896","https://openalex.org/W3170936177","https://openalex.org/W3172522282","https://openalex.org/W3175514052","https://openalex.org/W3176445421","https://openalex.org/W3196974791","https://openalex.org/W3198377975","https://openalex.org/W3211950664","https://openalex.org/W3214311327","https://openalex.org/W4200186624","https://openalex.org/W4211154280","https://openalex.org/W4225327041","https://openalex.org/W4226442948","https://openalex.org/W4284898017","https://openalex.org/W4285606530","https://openalex.org/W4292474994","https://openalex.org/W4304014690","https://openalex.org/W4309660795","https://openalex.org/W4312310776","https://openalex.org/W4312380001","https://openalex.org/W4312383651","https://openalex.org/W4312415534","https://openalex.org/W4312446817","https://openalex.org/W4312658754","https://openalex.org/W4312825288","https://openalex.org/W4313123347","https://openalex.org/W4319300466","https://openalex.org/W4381803424","https://openalex.org/W4386113246","https://openalex.org/W4387682108","https://openalex.org/W4388192054","https://openalex.org/W4392293656"],"related_works":["https://openalex.org/W4388870064","https://openalex.org/W2210139803","https://openalex.org/W4235186151","https://openalex.org/W2054685365","https://openalex.org/W2056057048","https://openalex.org/W2667588871","https://openalex.org/W2272354214","https://openalex.org/W2084768720","https://openalex.org/W2271369634","https://openalex.org/W2043010663"],"abstract_inverted_index":{"Audio-visual":[0],"event":[1,171,222],"localization":[2],"(AVEL)":[3],"task":[4,91],"aims":[5],"to":[6,18,103,181,192],"judge":[7],"and":[8,12,31,56,62,92,128,143,153,187,204,216],"classify":[9],"an":[10,46],"audible":[11],"visible":[13],"event.":[14],"Existing":[15],"methods":[16],"devote":[17],"this":[19,137],"goal":[20],"by":[21],"transferring":[22,67],"pre-trained":[23,69],"knowledge":[24,70,87,95,108,175,189,213,220],"as":[25,27],"well":[26],"understanding":[28,127,147,169],"temporal":[29,54,151],"dependencies":[30,152],"cross-modal":[32,57,154],"correlations":[33,155],"of":[34,53,88,96,109,141,190,214],"the":[35,42,51,68,89,97,105,184,198,210,217,226,242],"audio-visual":[36,43,86,201,218],"scene.":[37],"However,":[38],"most":[39],"works":[40],"comprehend":[41],"scene":[44,126,146,168],"from":[45,71,159],"entangled":[47],"temporal-aware":[48,64,145,161],"perspective,":[49],"ignoring":[50],"learning":[52],"dependency":[55],"correlation":[58],"in":[59,136,149],"both":[60],"forward":[61,142],"backward":[63,144],"views.":[65],"Recently,":[66],"Contrastive":[72],"Language-Image":[73],"Pre-training":[74],"model":[75,133],"(CLIP)":[76],"has":[77,114],"shown":[78],"remarkable":[79],"results":[80,236],"across":[81],"various":[82],"tasks.":[83],"Nevertheless,":[84],"since":[85],"AVEL":[90,112,193],"image-text":[93,106,129,185,211],"alignment":[94,107,188,212],"CLIP":[98,110,191,215],"exist":[99],"heterogeneous":[100],"gap,":[101],"how":[102],"transfer":[104,183],"into":[111],"field":[113],"barely":[115],"been":[116],"investigated.":[117],"To":[118],"address":[119],"these":[120],"challenges,":[121],"a":[122,174],"novel":[123],"Dual":[124],"Temporal-aware":[125],"Knowledge":[130],"Bridging":[131],"(DTKB)":[132],"is":[134,179,229],"proposed":[135,180],"paper.":[138],"DTKB":[139,164,239],"consists":[140],"streams,":[148],"which":[150],"are":[156],"explicitly":[157],"captured":[158],"dual":[160],"perspectives.":[162],"Consequently,":[163],"can":[165],"achieve":[166],"fine-grained":[167],"for":[170,221],"localization.":[172],"Additionally,":[173],"bridging":[176,209],"(KB)":[177],"module":[178,196,228],"simultaneously":[182],"representation":[186],"task.":[194],"This":[195],"regulates":[197],"ratio":[199],"between":[200],"fusion":[202],"features":[203],"CLIP\u2019s":[205],"visual":[206],"features,":[207],"thereby":[208],"new":[219],"category":[223],"prediction.":[224],"Besides,":[225],"KB":[227],"compatible":[230],"with":[231],"previous":[232],"models.":[233,244],"Extensive":[234],"experimental":[235],"demonstrate":[237],"that":[238],"significantly":[240],"outperforms":[241],"state-of-the-arts":[243]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
