{"id":"https://openalex.org/W7083677910","doi":"https://doi.org/10.48550/arxiv.2509.21989","title":"Mind-the-Glitch: Visual Correspondence for Detecting Inconsistencies in Subject-Driven Generation","display_name":"Mind-the-Glitch: Visual Correspondence for Detecting Inconsistencies in Subject-Driven Generation","publication_year":2025,"publication_date":"2025-09-26","ids":{"openalex":"https://openalex.org/W7083677910","doi":"https://doi.org/10.48550/arxiv.2509.21989"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2509.21989","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.21989","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2509.21989","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Eldesokey, Abdelrahman","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Eldesokey, Abdelrahman","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Cvejic, Aleksandar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cvejic, Aleksandar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ghanem, Bernard","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ghanem, Bernard","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Wonka, Peter","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wonka, Peter","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11536","display_name":"Consumer Retail Behavior Studies","score":0.03819999843835831,"subfield":{"id":"https://openalex.org/subfields/1406","display_name":"Marketing"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11536","display_name":"Consumer Retail Behavior Studies","score":0.03819999843835831,"subfield":{"id":"https://openalex.org/subfields/1406","display_name":"Marketing"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13847","display_name":"Literature Analysis and Criticism","score":0.027499999850988388,"subfield":{"id":"https://openalex.org/subfields/2735","display_name":"Pediatrics, Perinatology and Child Health"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12392","display_name":"Sharing Economy and Platforms","score":0.02250000089406967,"subfield":{"id":"https://openalex.org/subfields/1406","display_name":"Marketing"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5910999774932861},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5479000210762024},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.5328999757766724},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5121999979019165},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.44200000166893005},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.42559999227523804},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4147999882698059}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8241999745368958},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6628000140190125},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5910999774932861},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5479000210762024},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.5328999757766724},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5121999979019165},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.44200000166893005},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.42559999227523804},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4147999882698059},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.39309999346733093},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.3792000114917755},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.34630000591278076},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3310000002384186},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32420000433921814},{"id":"https://openalex.org/C160086991","wikidata":"https://www.wikidata.org/wiki/Q5939193","display_name":"Human visual system model","level":3,"score":0.27129998803138733},{"id":"https://openalex.org/C2983787585","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Feature matching","level":3,"score":0.2685000002384186},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.25519999861717224}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2509.21989","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.21989","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2509.21989","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.21989","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.6428173184394836}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"propose":[1,108],"a":[2,22,94,109,173],"novel":[3],"approach":[4,129],"for":[5,176],"disentangling":[6],"visual":[7,19,45,56,83,118,143],"and":[8,82,92,138,165],"semantic":[9,28,81],"features":[10,46,57],"from":[11],"the":[12,26,62,99,104,158],"backbones":[13,33],"of":[14,64,150,167],"pre-trained":[15],"diffusion":[16,31],"models,":[17],"enabling":[18,147],"correspondence":[20],"in":[21,120,141,169],"manner":[23],"analogous":[24],"to":[25,36,47,61,97],"well-established":[27],"correspondence.":[29],"While":[30],"model":[32],"are":[34],"known":[35],"encode":[37],"semantically":[38],"rich":[39],"features,":[40],"they":[41],"must":[42],"also":[43,146],"contain":[44],"support":[48],"their":[49],"image":[50,77,89,122],"synthesis":[51],"capabilities.":[52],"However,":[53],"isolating":[54],"these":[55],"is":[58,157],"challenging":[59],"due":[60],"absence":[63],"annotated":[65,80],"datasets.":[66],"To":[67,153],"address":[68],"this,":[69],"we":[70,107],"introduce":[71],"an":[72],"automated":[73],"pipeline":[74],"that":[75,116,127,161],"constructs":[76],"pairs":[78],"with":[79],"correspondences":[84],"based":[85],"on":[86],"existing":[87],"subject-driven":[88,121,170],"generation":[90],"datasets,":[91],"design":[93],"contrastive":[95],"architecture":[96],"separate":[98],"two":[100],"feature":[101],"types.":[102],"Leveraging":[103],"disentangled":[105],"representations,":[106],"new":[110],"metric,":[111],"Visual":[112],"Semantic":[113],"Matching":[114],"(VSM),":[115],"quantifies":[117],"inconsistencies":[119,144,168],"generation.":[123],"Empirical":[124],"results":[125],"show":[126],"our":[128,154],"outperforms":[130],"global":[131],"feature-based":[132],"metrics":[133],"such":[134],"as":[135],"CLIP,":[136],"DINO,":[137],"vision--language":[139],"models":[140],"quantifying":[142],"while":[145],"spatial":[148],"localization":[149,166],"inconsistent":[151],"regions.":[152],"knowledge,":[155],"this":[156,178],"first":[159],"method":[160],"supports":[162],"both":[163],"quantification":[164],"generation,":[171],"offering":[172],"valuable":[174],"tool":[175],"advancing":[177],"task.":[179],"Project":[180],"Page:https://abdo-eldesokey.github.io/mind-the-glitch/":[181]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
