{"id":"https://openalex.org/W4415540935","doi":"https://doi.org/10.1145/3746027.3761980","title":"HOLA: Enhancing Audio-visual Deepfake Detection via Hierarchical Contextual Aggregations and Efficient Pre-training","display_name":"HOLA: Enhancing Audio-visual Deepfake Detection via Hierarchical Contextual Aggregations and Efficient Pre-training","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415540935","doi":"https://doi.org/10.1145/3746027.3761980"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3761980","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3761980","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108497986","display_name":"Xuecheng Wu","orcid":"https://orcid.org/0000-0002-6244-0269"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xuecheng Wu","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100705864","display_name":"Heli Sun","orcid":"https://orcid.org/0000-0003-0818-0301"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Heli Sun","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043360673","display_name":"Danlei Huang","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Danlei Huang","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087641872","display_name":"Xinyi Yin","orcid":null},"institutions":[{"id":"https://openalex.org/I38877650","display_name":"Zhengzhou University","ror":"https://ror.org/04ypx8c21","country_code":"CN","type":"education","lineage":["https://openalex.org/I38877650"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinyi Yin","raw_affiliation_strings":["Zhengzhou University, Zhengzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhengzhou University, Zhengzhou, China","institution_ids":["https://openalex.org/I38877650"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115602498","display_name":"Yifan Wang","orcid":"https://orcid.org/0009-0002-8153-492X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yifan Wang","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101854659","display_name":"Hao Wang","orcid":"https://orcid.org/0000-0001-6959-7237"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Wang","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101243907","display_name":"Jia Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jia Zhang","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018130513","display_name":"Fei Wang","orcid":"https://orcid.org/0000-0003-3920-0264"},"institutions":[{"id":"https://openalex.org/I27357992","display_name":"Dalian University of Technology","ror":"https://ror.org/023hj5876","country_code":"CN","type":"education","lineage":["https://openalex.org/I27357992"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fei Wang","raw_affiliation_strings":["Dalian University of Technology, Dalian, China"],"affiliations":[{"raw_affiliation_string":"Dalian University of Technology, Dalian, China","institution_ids":["https://openalex.org/I27357992"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113911018","display_name":"Pan Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peihao Guo","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110964154","display_name":"Suchuan Xing","orcid":null},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Suyu Xing","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051556369","display_name":"Junxiao Xue","orcid":"https://orcid.org/0000-0003-1569-5362"},"institutions":[{"id":"https://openalex.org/I4210123185","display_name":"Zhejiang Lab","ror":"https://ror.org/02m2h7991","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210123185"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junxiao Xue","raw_affiliation_strings":["Zhejiang Lab, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang Lab, Hangzhou, China","institution_ids":["https://openalex.org/I4210123185"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101469086","display_name":"Liang He","orcid":"https://orcid.org/0000-0002-6463-5158"},"institutions":[{"id":"https://openalex.org/I87445476","display_name":"Xi'an Jiaotong University","ror":"https://ror.org/017zhmm22","country_code":"CN","type":"education","lineage":["https://openalex.org/I87445476"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang He","raw_affiliation_strings":["Xi'an Jiaotong University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"Xi'an Jiaotong University, Xi'an, China","institution_ids":["https://openalex.org/I87445476"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":12,"corresponding_author_ids":["https://openalex.org/A5108497986"],"corresponding_institution_ids":["https://openalex.org/I87445476"],"apc_list":null,"apc_paid":null,"fwci":1.4872,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.86705262,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"13692","last_page":"13699"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10688","display_name":"Image and Signal Denoising Methods","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5580000281333923},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.37959998846054077},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3467999994754791},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.337799996137619},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.32899999618530273}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7746999859809875},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6299999952316284},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5753999948501587},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5580000281333923},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.37959998846054077},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3467999994754791},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34220001101493835},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.337799996137619},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.32899999618530273},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.32280001044273376},{"id":"https://openalex.org/C148043351","wikidata":"https://www.wikidata.org/wiki/Q4456944","display_name":"Current (fluid)","level":2,"score":0.3059000074863434}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746027.3761980","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3761980","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1522734439","https://openalex.org/W2752782242","https://openalex.org/W2963524571","https://openalex.org/W2982058372","https://openalex.org/W3015598461","https://openalex.org/W3034713808","https://openalex.org/W3035574324","https://openalex.org/W3094550259","https://openalex.org/W3173797312","https://openalex.org/W3174508664","https://openalex.org/W3178572954","https://openalex.org/W3186489652","https://openalex.org/W3197199219","https://openalex.org/W4220658233","https://openalex.org/W4293055826","https://openalex.org/W4312560592","https://openalex.org/W4312722235","https://openalex.org/W4319978495","https://openalex.org/W4361856786","https://openalex.org/W4383337695","https://openalex.org/W4385801058","https://openalex.org/W4387540674","https://openalex.org/W4393181080","https://openalex.org/W4401753888","https://openalex.org/W4401798331","https://openalex.org/W4402774452","https://openalex.org/W4403791323","https://openalex.org/W6907407296"],"related_works":[],"abstract_inverted_index":{"Advances":[0],"in":[1,44,54],"Generative":[2],"AI":[3],"have":[4],"made":[5],"video-level":[6,57],"deepfake":[7,58],"detection":[8,16],"increasingly":[9],"challenging,":[10],"exposing":[11],"the":[12,27,39,45,55,96,111,131,147,161,167],"limitations":[13],"of":[14,32,41,65,133,142,151],"current":[15],"techniques.":[17],"In":[18],"this":[19],"paper,":[20],"we":[21,48,109],"present":[22],"HOLA,":[23],"our":[24,62,134,152,156],"solution":[25],"to":[26,70,117,145],"Video-Level":[28],"Deepfake":[29],"Detection":[30,35],"track":[31],"2025":[33],"1M-Deepfakes":[34],"Challenge.":[36],"Inspired":[37],"by":[38,163],"success":[40],"large-scale":[42],"pre-training":[43,53],"general":[46],"domain,":[47],"first":[49],"scale":[50],"audio-visual":[51,87],"self-supervised":[52],"multimodal":[56],"detection,":[59],"which":[60],"leverages":[61],"self-built":[63],"dataset":[64],"1.81M":[66],"samples,":[67],"thereby":[68],"leading":[69],"a":[71,100,140],"unified":[72],"two-stage":[73],"framework.":[74],"To":[75],"be":[76],"specific,":[77],"HOLA":[78,157],"features":[79],"an":[80],"iterative-aware":[81],"cross-modal":[82],"learning":[83],"module":[84],"for":[85,103],"selective":[86],"interactions,":[88],"hierarchical":[89],"contextual":[90],"modeling":[91],"with":[92],"gated":[93],"aggregations":[94],"under":[95],"local-global":[97],"perspective,":[98],"and":[99,127],"pyramid-like":[101],"refiner":[102],"scale-aware":[104],"cross-grained":[105],"semantic":[106],"enhancements.":[107],"Moreover,":[108],"propose":[110],"pseudo":[112],"supervised":[113],"singal":[114],"injection":[115],"strategy":[116],"further":[118],"boost":[119],"model":[120],"performance.":[121],"Extensive":[122],"experiments":[123],"across":[124],"expert":[125],"models":[126],"MLLMs":[128],"impressivly":[129],"demonstrate":[130],"effectiveness":[132],"proposed":[135],"HOLA.":[136],"We":[137],"also":[138],"conduct":[139],"series":[141],"ablation":[143],"studies":[144],"explore":[146],"crucial":[148],"design":[149],"factors":[150],"introduced":[153],"components.":[154],"Remarkably,":[155],"ranks":[158],"1st,":[159],"outperforming":[160],"second":[162],"0.0476":[164],"AUC":[165],"on":[166],"TestA":[168],"set.":[169]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-25T00:00:00"}
