{"id":"https://openalex.org/W4392539598","doi":"https://doi.org/10.48550/arxiv.2403.02581","title":"VEglue: Testing Visual Entailment Systems via Object-Aligned Joint Erasing","display_name":"VEglue: Testing Visual Entailment Systems via Object-Aligned Joint Erasing","publication_year":2024,"publication_date":"2024-03-05","ids":{"openalex":"https://openalex.org/W4392539598","doi":"https://doi.org/10.48550/arxiv.2403.02581"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2403.02581","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.02581","pdf_url":"https://arxiv.org/pdf/2403.02581","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2403.02581","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065490541","display_name":"Zhiyuan Chang","orcid":"https://orcid.org/0000-0001-7251-501X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chang, Zhiyuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100443408","display_name":"Mingyang Li","orcid":"https://orcid.org/0000-0002-0401-4923"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Mingyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108814327","display_name":"Junjie Wang","orcid":"https://orcid.org/0000-0002-9941-6713"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Junjie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100354225","display_name":"Cheng Li","orcid":"https://orcid.org/0000-0001-6110-8099"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Cheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100434918","display_name":"Qing Wang","orcid":"https://orcid.org/0009-0004-1353-7541"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Qing","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5065490541"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.6978291273117065},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6372788548469543},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.6277945637702942},{"id":"https://openalex.org/keywords/textual-entailment","display_name":"Textual entailment","score":0.544842541217804},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42651286721229553},{"id":"https://openalex.org/keywords/logical-consequence","display_name":"Logical consequence","score":0.42348363995552063},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.39466747641563416},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.37129539251327515},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10088390111923218},{"id":"https://openalex.org/keywords/structural-engineering","display_name":"Structural engineering","score":0.0720803439617157}],"concepts":[{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.6978291273117065},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6372788548469543},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6277945637702942},{"id":"https://openalex.org/C95318506","wikidata":"https://www.wikidata.org/wiki/Q6588467","display_name":"Textual entailment","level":3,"score":0.544842541217804},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42651286721229553},{"id":"https://openalex.org/C134752490","wikidata":"https://www.wikidata.org/wiki/Q374182","display_name":"Logical consequence","level":2,"score":0.42348363995552063},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.39466747641563416},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37129539251327515},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10088390111923218},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0720803439617157}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2403.02581","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.02581","pdf_url":"https://arxiv.org/pdf/2403.02581","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2403.02581","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2403.02581","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2403.02581","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2403.02581","pdf_url":"https://arxiv.org/pdf/2403.02581","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4392539598.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2169644218","https://openalex.org/W12963412","https://openalex.org/W2250460949","https://openalex.org/W3158371345","https://openalex.org/W3141423438","https://openalex.org/W2071098659","https://openalex.org/W2627035043","https://openalex.org/W4385571113","https://openalex.org/W2937401546","https://openalex.org/W3030695269"],"abstract_inverted_index":{"Visual":[0],"entailment":[1],"(VE)":[2],"is":[3,15,23,30,52,202],"a":[4,13,21,26,62],"multimodal":[5],"reasoning":[6],"task":[7],"consisting":[8],"of":[9,86,89,175],"image-sentence":[10],"pairs":[11],"whereby":[12],"promise":[14],"defined":[16],"by":[17,25,108,112,225,233],"an":[18,129],"image,":[19],"and":[20,147,156,220],"hypothesis":[22,152],"described":[24],"sentence.":[27,39],"The":[28],"goal":[29],"to":[31,83,153,170,235],"predict":[32],"whether":[33],"the":[34,38,53,84,87,98,104,113,118,121,141,145,151,162,173,176,206,223,230,237,255,258],"image":[35,119],"semantically":[36],"entails":[37],"VE":[40,66,109,135,185,238],"systems":[41,136,186],"have":[42],"been":[43],"widely":[44],"adopted":[45],"in":[46,79,117,144,150,247],"many":[47],"downstream":[48],"tasks.":[49],"Metamorphic":[50,166],"testing":[51],"commonest":[54],"technique":[55],"for":[56,65,124,134],"AI":[57],"algorithms,":[58],"but":[59],"it":[60],"poses":[61],"significant":[63],"challenge":[64],"testing.":[67,137],"They":[68],"either":[69],"only":[70],"consider":[71],"perturbations":[72,96],"on":[73,97,161,182,199,218,249,257],"single":[74],"modality":[75],"which":[76,100,201,240],"would":[77],"result":[78],"ineffective":[80],"tests":[81,231,252],"due":[82],"destruction":[85],"relationship":[88],"image-text":[90],"pair,":[91],"or":[92],"just":[93],"conduct":[94],"shallow":[95],"inputs":[99],"can":[101],"hardly":[102],"detect":[103,196],"decision":[105],"error":[106],"made":[107],"systems.":[110],"Motivated":[111],"fact":[114],"that":[115,193],"objects":[116,174],"are":[120,168],"fundamental":[122],"element":[123],"reasoning,":[125],"we":[126,228],"propose":[127],"VEglue,":[128],"object-aligned":[130],"joint":[131],"erasing":[132],"approach":[133],"It":[138],"first":[139],"aligns":[140],"object":[142,148],"regions":[143],"premise":[146],"descriptions":[149],"identify":[154],"linked":[155],"un-linked":[157],"objects.":[158],"Then,":[159],"based":[160],"alignment":[163],"information,":[164],"three":[165],"Relations":[167],"designed":[169],"jointly":[171],"erase":[172],"two":[177,188],"modalities.":[178],"We":[179],"evaluate":[180],"VEglue":[181,194,210,234],"four":[183],"widely-used":[184],"involving":[187],"public":[189],"datasets.":[190],"Results":[191],"show":[192],"could":[195,211],"11,609":[197],"issues":[198],"average,":[200,219],"194%-2,846%":[203],"more":[204],"than":[205],"baselines.":[207],"In":[208],"addition,":[209],"reach":[212],"52.5%":[213],"Issue":[214],"Finding":[215],"Rate":[216],"(IFR)":[217],"significantly":[221],"outperform":[222],"baselines":[224],"17.1%-38.2%.":[226],"Furthermore,":[227],"leverage":[229],"generated":[232,251],"retrain":[236],"systems,":[239],"largely":[241],"improves":[242],"model":[243],"performance":[244],"(50.8%":[245],"increase":[246],"accuracy)":[248],"newly":[250],"without":[253],"sacrificing":[254],"accuracy":[256],"original":[259],"test":[260],"set.":[261]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
