{"id":"https://openalex.org/W7159625964","doi":"https://doi.org/10.48550/arxiv.2604.28022","title":"Are DeepFakes Realistic Enough? Exploring Semantic Mismatch as a Novel Challenge","display_name":"Are DeepFakes Realistic Enough? Exploring Semantic Mismatch as a Novel Challenge","publication_year":2026,"publication_date":"2026-04-30","ids":{"openalex":"https://openalex.org/W7159625964","doi":"https://doi.org/10.48550/arxiv.2604.28022"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.28022","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.28022","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.28022","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134930670","display_name":"Sharayu Nilesh Deshmukh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Deshmukh, Sharayu Nilesh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054375863","display_name":"Kailash Hambarde","orcid":"https://orcid.org/0000-0003-1012-2952"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hambarde, Kailash A.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075085972","display_name":"Joana C. Costa","orcid":"https://orcid.org/0000-0001-7046-2284"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Costa, Joana C.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134967929","display_name":"Hugo Proen\u00e7a","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Proen\u00e7a, Hugo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5088262626","display_name":"Tiago Roxo","orcid":"https://orcid.org/0000-0001-9563-8039"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roxo, Tiago","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.22460000216960907,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.22460000216960907,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.2012999951839447,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.19290000200271606,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6100999712944031},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.5102999806404114},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.5024999976158142},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.46959999203681946},{"id":"https://openalex.org/keywords/semantic-data-model","display_name":"Semantic data model","score":0.4302000105381012},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.4059000015258789},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.3799999952316284}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8295999765396118},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6100999712944031},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.5102999806404114},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.5024999976158142},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47679999470710754},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.46959999203681946},{"id":"https://openalex.org/C90312973","wikidata":"https://www.wikidata.org/wiki/Q7449052","display_name":"Semantic data model","level":2,"score":0.4302000105381012},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.4059000015258789},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.3799999952316284},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.3578999936580658},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3546000123023987},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.334199994802475},{"id":"https://openalex.org/C198942812","wikidata":"https://www.wikidata.org/wiki/Q496618","display_name":"Semantic property","level":2,"score":0.3327000141143799},{"id":"https://openalex.org/C2983685735","wikidata":"https://www.wikidata.org/wiki/Q5227355","display_name":"Data source","level":2,"score":0.3109999895095825},{"id":"https://openalex.org/C2778180026","wikidata":"https://www.wikidata.org/wiki/Q18378163","display_name":"Semantic heterogeneity","level":4,"score":0.30959999561309814},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.29510000348091125},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2702000141143799},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.2685999870300293},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2599000036716461}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.28022","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.28022","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.28022","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.28022","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.7416087985038757}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Current":[0],"DeepFake":[1,57,120,171,190],"detection":[2,172],"scenarios":[3],"are":[4,197],"mostly":[5],"binary,":[6],"yet":[7],"data":[8,44,63,196],"manipulation":[9,32],"can":[10,69],"vary":[11],"across":[12],"audio,":[13],"video,":[14],"or":[15],"both,":[16],"whose":[17],"variability":[18],"is":[19,59],"not":[20,60],"captured":[21],"in":[22,61,66,116,173],"binary":[23],"settings.":[24],"Four-class":[25],"audio-visual":[26,150],"formulations":[27],"address":[28],"this":[29,117],"by":[30,74,88],"discriminating":[31],"type,":[33],"but":[34,65],"introduce":[35,140],"a":[36,80,99,156],"unresolved":[37],"problem:":[38],"models":[39,115],"may":[40],"rely":[41],"solely":[42],"on":[43,180],"source":[45,64,193],"integrity":[46],"to":[47,169,187],"detect":[48],"DeepFakes":[49],"without":[50],"evaluating":[51],"their":[52],"semantic":[53,70,135,157,163],"consistency.":[54],"If":[55],"the":[56,62,75,85,97,111,123,127,162,185],"origin":[58],"its":[67],"content,":[68],"mismatch":[71,136,164],"be":[72],"assessed":[73],"state-of-the-art?":[76],"This":[77],"paper":[78],"proposes":[79],"new":[81,100,118],"evaluation":[82],"setup,":[83],"extending":[84],"four-class":[86],"formulation":[87],"explicitly":[89],"modeling":[90],"semantic-level":[91],"inconsistency":[92],"between":[93],"authentic":[94],"modalities":[95],"with":[96,105,134],"introduction":[98],"class:":[101],"Real":[102],"Audio-Real":[103],"Video":[104],"Semantic":[106],"Mismatch":[107],"(RARV-SMM).":[108],"We":[109,138,153],"assess":[110],"robustness":[112],"of":[113,129],"state-of-the-art":[114,178],"realistic":[119,189],"setting,":[121],"using":[122],"FakeAVCeleb":[124,181],"dataset,":[125],"highlighting":[126],"limitations":[128],"existing":[130],"approaches":[131],"when":[132],"faced":[133],"data.":[137],"further":[139],"three":[141],"RARV-SMM":[142],"variants":[143],"that":[144,160],"expose":[145],"distinct":[146],"architectural":[147],"vulnerabilities":[148],"as":[149],"divergence":[151],"increases.":[152],"also":[154],"propose":[155],"reinforcement":[158],"strategy":[159],"incorporates":[161],"class":[165],"and":[166,177,182,195],"ImageBind":[167],"embeddings":[168],"improve":[170],"both":[174],"our":[175],"proposed":[176],"settings,":[179],"LAV-DF,":[183],"paving":[184],"way":[186],"more":[188],"detectors.":[191],"The":[192],"code":[194],"available":[198],"at":[199],"https://github.com/.":[200]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-02T00:00:00"}
