{"id":"https://openalex.org/W7138018304","doi":"https://doi.org/10.1609/aaai.v40i13.38108","title":"Identity-Aware Vision-Language Model for Explainable Face Forgery Detection","display_name":"Identity-Aware Vision-Language Model for Explainable Face Forgery Detection","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7138018304","doi":"https://doi.org/10.1609/aaai.v40i13.38108"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i13.38108","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i13.38108","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i13.38108","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129704512","display_name":"Junhao Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Junhao Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129704666","display_name":"Jingjing Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jingjing Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129673507","display_name":"Yang Jiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang Jiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129750490","display_name":"Jiacheng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiacheng Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111532053","display_name":"Zhiyu Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhiyu Tan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129730639","display_name":"Hao Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129669423","display_name":"Yu-Gang Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu-Gang Jiang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5129704512"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.30126771,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"13","first_page":"11278","last_page":"11286"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.35510000586509705,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.35510000586509705,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.35030001401901245,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.17329999804496765,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/identity","display_name":"Identity (music)","score":0.49140000343322754},{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.45910000801086426},{"id":"https://openalex.org/keywords/face","display_name":"Face (sociological concept)","score":0.4555000066757202},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4381999969482422},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.42890000343322754},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.4284000098705292},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.385699987411499},{"id":"https://openalex.org/keywords/facial-recognition-system","display_name":"Facial recognition system","score":0.36390000581741333},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.34130001068115234}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8151999711990356},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6603000164031982},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.49140000343322754},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.45910000801086426},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.4555000066757202},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4381999969482422},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4350999891757965},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.42890000343322754},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.4284000098705292},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.385699987411499},{"id":"https://openalex.org/C31510193","wikidata":"https://www.wikidata.org/wiki/Q1192553","display_name":"Facial recognition system","level":3,"score":0.36390000581741333},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3488999903202057},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.34130001068115234},{"id":"https://openalex.org/C2779010991","wikidata":"https://www.wikidata.org/wiki/Q2720909","display_name":"Artifact (error)","level":2,"score":0.3285999894142151},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.32519999146461487},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.32429999113082886},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.30630001425743103},{"id":"https://openalex.org/C4641261","wikidata":"https://www.wikidata.org/wiki/Q11681085","display_name":"Face detection","level":4,"score":0.305400013923645},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.2897999882698059},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.2791999876499176},{"id":"https://openalex.org/C184297639","wikidata":"https://www.wikidata.org/wiki/Q177765","display_name":"Biometrics","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.2632000148296356},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2630000114440918},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.2567000091075897},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2547999918460846},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.25440001487731934},{"id":"https://openalex.org/C2779356469","wikidata":"https://www.wikidata.org/wiki/Q502918","display_name":"Counterfeit","level":2,"score":0.25440001487731934}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i13.38108","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i13.38108","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i13.38108","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i13.38108","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,36,60],"generative":[3],"artificial":[4],"intelligence":[5],"have":[6],"enabled":[7],"the":[8,49,146,176],"creation":[9],"of":[10,175],"highly":[11],"realistic":[12],"image":[13],"forgeries,":[14],"raising":[15],"significant":[16],"concerns":[17],"about":[18],"digital":[19],"media":[20],"authenticity.":[21],"While":[22],"existing":[23,40],"detection":[24,161,166],"methods":[25,65],"demonstrate":[26,185],"promising":[27],"results":[28],"on":[29,68],"benchmark":[30],"datasets,":[31],"they":[32],"face":[33],"critical":[34,180],"limitations":[35],"real-world":[37],"applications.":[38],"First,":[39],"detectors":[41,200],"typically":[42],"fail":[43],"to":[44,123,148],"detect":[45],"semantic":[46,107],"inconsistencies":[47],"with":[48],"person\u2019s":[50],"identity,":[51],"such":[52],"as":[53],"implausible":[54],"behaviors":[55],"or":[56,83],"incompatible":[57],"environmental":[58],"contexts":[59],"given":[61],"images.":[62],"Second,":[63],"these":[64,89],"rely":[66],"heavily":[67],"low-level":[69,101,181],"visual":[70,102],"cues,":[71],"making":[72],"them":[73],"effective":[74],"for":[75],"known":[76],"forgeries":[77],"but":[78],"less":[79],"reliable":[80],"against":[81],"new":[82],"unseen":[84],"manipulation":[85],"techniques.":[86],"To":[87],"address":[88],"challenges,":[90],"we":[91,129],"present":[92],"a":[93,131,164],"novel":[94],"personalized":[95],"vision-language":[96],"model":[97,147],"(VLM)":[98],"that":[99,120,134,168,186],"integrates":[100],"artifact":[103],"analysis":[104],"and":[105,192,201],"high-level":[106],"inconsistency":[108],"detection.":[109],"Unlike":[110],"previous":[111],"VLM-based":[112],"methods,":[113],"our":[114,187],"approach":[115,188],"avoids":[116],"resource-intensive":[117],"supervised":[118],"fine-tuning":[119],"often":[121],"struggles":[122],"preserve":[124],"distinct":[125,150],"identity":[126,151],"characteristics.":[127],"Instead,":[128],"employ":[130],"lightweight":[132,165],"method":[133],"dynamically":[135],"encodes":[136],"identity-specific":[137],"information":[138,171],"into":[139],"specialized":[140],"identifier":[141],"tokens.":[142,209],"This":[143],"design":[144],"enables":[145],"learn":[149],"characteristics":[152],"while":[153,204],"maintaining":[154],"robust":[155],"generalization":[156],"capabilities.":[157],"We":[158],"further":[159],"enhance":[160],"capabilities":[162],"through":[163],"adapter":[167],"extracts":[169],"fine-grained":[170],"from":[172],"shallow":[173],"features":[174],"vision":[177],"encoder,":[178],"preserving":[179],"evidence.":[182],"Comprehensive":[183],"experiments":[184],"achieves":[189],"94.25%":[190],"accuracy":[191],"94.08%":[193],"F1":[194],"score,":[195],"outperforming":[196],"both":[197],"traditional":[198],"forgery":[199],"general":[202],"VLMs":[203],"requiring":[205],"only":[206],"10":[207],"extra":[208]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
