{"id":"https://openalex.org/W4415539321","doi":"https://doi.org/10.1145/3746027.3755563","title":"Query-Based Audio-Visual Temporal Forgery Localization with Register-Enhanced Representation Learning","display_name":"Query-Based Audio-Visual Temporal Forgery Localization with Register-Enhanced Representation Learning","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415539321","doi":"https://doi.org/10.1145/3746027.3755563"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3755563","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755563","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104280534","display_name":"Xiaodong Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]},{"id":"https://openalex.org/I4210111616","display_name":"Wuhan Business University","ror":"https://ror.org/0282ggx30","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210111616"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaodong Zhu","raw_affiliation_strings":["NERCMS, School of Computer Science, Wuhan University, Wuhan, Hubei, China"],"raw_orcid":"https://orcid.org/0009-0005-3342-4883","affiliations":[{"raw_affiliation_string":"NERCMS, School of Computer Science, Wuhan University, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I4210111616","https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077956330","display_name":"Suting Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]},{"id":"https://openalex.org/I4210111616","display_name":"Wuhan Business University","ror":"https://ror.org/0282ggx30","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210111616"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Suting Wang","raw_affiliation_strings":["NERCMS, School of Computer Science, Wuhan University, Wuhan, Hubei, China"],"raw_orcid":"https://orcid.org/0009-0009-9156-5456","affiliations":[{"raw_affiliation_string":"NERCMS, School of Computer Science, Wuhan University, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I4210111616","https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Junqi Yang","orcid":"https://orcid.org/0009-0007-6488-2961"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]},{"id":"https://openalex.org/I4210111616","display_name":"Wuhan Business University","ror":"https://ror.org/0282ggx30","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210111616"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junqi Yang","raw_affiliation_strings":["NERCMS, School of Computer Science, Wuhan University, Wuhan, Hubei, China"],"raw_orcid":"https://orcid.org/0009-0007-6488-2961","affiliations":[{"raw_affiliation_string":"NERCMS, School of Computer Science, Wuhan University, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I4210111616","https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102859655","display_name":"Yuhong Yang","orcid":"https://orcid.org/0000-0003-3001-7957"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]},{"id":"https://openalex.org/I4210111616","display_name":"Wuhan Business University","ror":"https://ror.org/0282ggx30","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210111616"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhong Yang","raw_affiliation_strings":["NERCMS, School of Computer Science, Wuhan University, Wuhan City, Hubei, China"],"raw_orcid":"https://orcid.org/0000-0003-3001-7957","affiliations":[{"raw_affiliation_string":"NERCMS, School of Computer Science, Wuhan University, Wuhan City, Hubei, China","institution_ids":["https://openalex.org/I4210111616","https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041664453","display_name":"Weiping Tu","orcid":"https://orcid.org/0000-0002-6933-3298"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]},{"id":"https://openalex.org/I4210111616","display_name":"Wuhan Business University","ror":"https://ror.org/0282ggx30","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210111616"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiping Tu","raw_affiliation_strings":["NERCMS, School of Computer Science, Wuhan University, Wuhan, Hubei, China"],"raw_orcid":"https://orcid.org/0000-0002-6933-3298","affiliations":[{"raw_affiliation_string":"NERCMS, School of Computer Science, Wuhan University, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I4210111616","https://openalex.org/I37461747"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100741750","display_name":"Zhongyuan Wang","orcid":"https://orcid.org/0000-0002-9796-488X"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]},{"id":"https://openalex.org/I4210111616","display_name":"Wuhan Business University","ror":"https://ror.org/0282ggx30","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210111616"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhongyuan Wang","raw_affiliation_strings":["NERCMS, School of Computer Science, Wuhan University, Wuhan, Hubei, China"],"raw_orcid":"https://orcid.org/0000-0002-9796-488X","affiliations":[{"raw_affiliation_string":"NERCMS, School of Computer Science, Wuhan University, Wuhan, Hubei, China","institution_ids":["https://openalex.org/I4210111616","https://openalex.org/I37461747"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.26067984,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"8547","last_page":"8556"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5968999862670898},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5630999803543091},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5479000210762024},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5314000248908997},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5081999897956848},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4708999991416931},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.4291999936103821}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8299999833106995},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5968999862670898},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5667999982833862},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5630999803543091},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5479000210762024},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5314000248908997},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5081999897956848},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4708999991416931},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.4291999936103821},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3774000108242035},{"id":"https://openalex.org/C177284502","wikidata":"https://www.wikidata.org/wiki/Q1005390","display_name":"Adapter (computing)","level":2,"score":0.3596999943256378},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.34950000047683716},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.29580000042915344},{"id":"https://openalex.org/C77277458","wikidata":"https://www.wikidata.org/wiki/Q1969246","display_name":"Temporal database","level":2,"score":0.2720000147819519},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.26989999413490295},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26330000162124634},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.2599000036716461},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2533999979496002}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3755563","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755563","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6789628829","display_name":null,"funder_award_id":"62171326, 62371350, 62471343","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2808631503","https://openalex.org/W2890952074","https://openalex.org/W3093077034","https://openalex.org/W3173161217","https://openalex.org/W3204659849","https://openalex.org/W4360993864","https://openalex.org/W4385822353","https://openalex.org/W4386057769","https://openalex.org/W4386267173","https://openalex.org/W4386272941","https://openalex.org/W4391230384","https://openalex.org/W4403791323","https://openalex.org/W4407392445","https://openalex.org/W4413145949"],"related_works":[],"abstract_inverted_index":{"Temporal":[0,22],"forgery":[1],"in":[2],"multimedia-where":[3],"audio":[4],"or":[5],"video":[6],"streams":[7],"are":[8,66],"subtly":[9],"manipulated-poses":[10],"critical":[11],"challenges":[12],"for":[13,51,100,113,153],"content":[14],"authenticity":[15],"verification.":[16],"While":[17],"video-level":[18],"detection":[19,156],"has":[20],"advanced,":[21],"Forgery":[23],"Localization":[24],"(TFL)":[25],"remains":[26],"underexplored,":[27],"often":[28],"limited":[29,78],"by":[30],"weak":[31],"audio-visual":[32,61,105],"modeling":[33],"and":[34,63,117,132,145,157],"reliance":[35],"on":[36,76,130],"non-learnable":[37],"post-processing.":[38,91],"To":[39],"address":[40],"these":[41],"challenges,":[42],"we":[43],"propose":[44],"RegQAV,":[45],"a":[46,77,95,107,118],"Register-enhanced":[47],"Query-based":[48],"Audio-Visual":[49],"framework":[50],"TFL.":[52],"RegQAV":[53,136],"exploits":[54],"pretrained":[55],"foundation":[56],"models":[57],"to":[58,68,73,122],"capture":[59],"fine-grained":[60],"correspondences":[62],"learnable":[64],"registers":[65],"introduced":[67],"mitigate":[69],"the":[70,125],"model's":[71],"tendency":[72],"overly":[74],"focus":[75],"set":[79],"of":[80,104,127],"temporal":[81],"features.":[82],"A":[83],"query-based":[84],"localization":[85],"strategy":[86],"enables":[87],"end-to-end":[88],"optimization":[89],"without":[90],"We":[92],"also":[93],"introduce":[94],"Modality":[96],"Fusion":[97],"Adapter":[98],"(MFA)":[99],"effective":[101],"multi-scale":[102],"integration":[103],"data,":[106],"Deepfake":[108],"Queries":[109],"Generation":[110],"(DQG)":[111],"module":[112],"efficient":[114],"query":[115],"initialization,":[116],"Poisson":[119],"Count-Based":[120],"Approach":[121],"dynamically":[123],"predict":[124],"number":[126],"forgeries.":[128],"Experiments":[129],"LAV-DF":[131],"AV-Deepfake1M":[133],"show":[134],"that":[135],"achieves":[137],"state-of-the-art":[138],"performance":[139],"with":[140],"fewer":[141],"parameters,":[142],"faster":[143],"inference,":[144],"stronger":[146],"generalization.":[147],"This":[148],"work":[149],"offers":[150],"significant":[151],"potential":[152],"real-time":[154],"deepfake":[155],"other":[158],"multimedia":[159],"verification":[160],"applications.":[161],"The":[162],"code":[163],"is":[164],"available":[165],"at":[166],"https://github.com/zxd3099/RegQAV.":[167]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-25T00:00:00"}
