{"id":"https://openalex.org/W4288055855","doi":"https://doi.org/10.1145/3503161.3548284","title":"Chunk-aware Alignment and Lexical Constraint for Visual Entailment with Natural Language Explanations","display_name":"Chunk-aware Alignment and Lexical Constraint for Visual Entailment with Natural Language Explanations","publication_year":2022,"publication_date":"2022-10-10","ids":{"openalex":"https://openalex.org/W4288055855","doi":"https://doi.org/10.1145/3503161.3548284"},"language":"en","primary_location":{"id":"doi:10.1145/3503161.3548284","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3548284","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2207.11401","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100379380","display_name":"Qian Yang","orcid":"https://orcid.org/0000-0003-2304-9408"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qian Yang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019703861","display_name":"Yunxin Li","orcid":"https://orcid.org/0000-0003-4819-2489"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunxin Li","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083079672","display_name":"Baotian Hu","orcid":"https://orcid.org/0000-0001-7490-684X"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Baotian Hu","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017116858","display_name":"Lin Ma","orcid":"https://orcid.org/0000-0002-7331-6132"},"institutions":[{"id":"https://openalex.org/I4210087373","display_name":"Meizu (China)","ror":"https://ror.org/0067g4302","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210087373"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lin Ma","raw_affiliation_strings":["Meituan, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Meituan, Beijing, China","institution_ids":["https://openalex.org/I4210087373"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026429562","display_name":"Yuxin Ding","orcid":"https://orcid.org/0000-0001-9616-4461"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxin Ding","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100402911","display_name":"Min Zhang","orcid":"https://orcid.org/0000-0002-3895-5510"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Min Zhang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100379380"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":0.6581,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.77984386,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3587","last_page":"3597"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9861999750137329,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8348580598831177},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7221081256866455},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6588559746742249},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6107858419418335},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.5916716456413269},{"id":"https://openalex.org/keywords/logical-consequence","display_name":"Logical consequence","score":0.5583862662315369},{"id":"https://openalex.org/keywords/relation","display_name":"Relation (database)","score":0.5133744478225708},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5068898797035217},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5018024444580078},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.5002930164337158},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4664594233036041}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8348580598831177},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7221081256866455},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6588559746742249},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6107858419418335},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.5916716456413269},{"id":"https://openalex.org/C134752490","wikidata":"https://www.wikidata.org/wiki/Q374182","display_name":"Logical consequence","level":2,"score":0.5583862662315369},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.5133744478225708},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5068898797035217},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5018024444580078},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.5002930164337158},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4664594233036041},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3503161.3548284","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3548284","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2207.11401","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2207.11401","pdf_url":"https://arxiv.org/pdf/2207.11401","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2207.11401","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2207.11401","pdf_url":"https://arxiv.org/pdf/2207.11401","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.7900000214576721,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1522301498","https://openalex.org/W1575833922","https://openalex.org/W1773149199","https://openalex.org/W1861492603","https://openalex.org/W1956340063","https://openalex.org/W2123301721","https://openalex.org/W2123442489","https://openalex.org/W2154652894","https://openalex.org/W2185175083","https://openalex.org/W2250378130","https://openalex.org/W2506483933","https://openalex.org/W2560730294","https://openalex.org/W2606974598","https://openalex.org/W2745461083","https://openalex.org/W2788527488","https://openalex.org/W2886970679","https://openalex.org/W2896457183","https://openalex.org/W2912371042","https://openalex.org/W2938704169","https://openalex.org/W2951299559","https://openalex.org/W2951936329","https://openalex.org/W2962749469","https://openalex.org/W2962858109","https://openalex.org/W2963115613","https://openalex.org/W2963383024","https://openalex.org/W2963514444","https://openalex.org/W2963609017","https://openalex.org/W2973009097","https://openalex.org/W3010535384","https://openalex.org/W3035605030","https://openalex.org/W3090449556","https://openalex.org/W3091588028","https://openalex.org/W3102564565","https://openalex.org/W3105391665","https://openalex.org/W3119334387","https://openalex.org/W3161801106","https://openalex.org/W3184784418","https://openalex.org/W3193402170","https://openalex.org/W3195986544","https://openalex.org/W3206477365","https://openalex.org/W4205712089","https://openalex.org/W4206070857","https://openalex.org/W4221166856","https://openalex.org/W4313046229","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W4388335561","https://openalex.org/W2970530566","https://openalex.org/W4288261899","https://openalex.org/W4307309205","https://openalex.org/W2967478618","https://openalex.org/W4385009901","https://openalex.org/W4385572700","https://openalex.org/W2997152889","https://openalex.org/W4387768015","https://openalex.org/W4309703372"],"abstract_inverted_index":{"Visual":[0],"Entailment":[1],"with":[2],"natural":[3],"language":[4,40,155],"explanations":[5,104],"aims":[6],"to":[7,19,33,42,108,160,172,184],"infer":[8],"the":[9,21,35,44,48,62,67,80,86,94,102,150,174,187,193,200,205],"relationship":[10],"between":[11,56,66],"a":[12,17,29,39,117,131,137,141],"text-image":[13],"pair":[14],"and":[15,38,58,70,121,140,156,176,202,214,228],"generate":[16,43],"sentence":[18,151],"explain":[20],"decision-making":[22,96],"process.":[23],"Previous":[24],"methods":[25],"rely":[26],"mainly":[27,52],"on":[28,85,211,225],"pre-trained":[30,49],"vision-language":[31,50,77,178],"model":[32,41],"perform":[34],"relation":[36,99,138,194],"inference":[37,226],"corresponding":[45],"explanation.":[46],"However,":[47],"models":[51,224],"build":[53,161],"token-level":[54,175],"alignment":[55,65],"text":[57],"image":[59,158],"yet":[60],"ignore":[61],"high-level":[63],"semantic":[64,163],"phrases":[68],"(chunks)":[69],"visual":[71],"contents,":[72],"which":[73],"is":[74],"critical":[75,95],"for":[76],"reasoning.":[78,110],"Moreover,":[79],"explanation":[81,197],"generator":[82],"based":[83,124],"only":[84],"encoded":[87],"joint":[88],"representation":[89],"does":[90],"not":[91],"explicitly":[92],"consider":[93],"points":[97],"of":[98,204,230],"inference.":[100],"Thus":[101],"generated":[103,231],"are":[105],"less":[106],"faithful":[107],"visual-language":[109],"To":[111],"mitigate":[112],"these":[113],"problems,":[114],"we":[115],"propose":[116],"unified":[118],"Chunk-aware":[119,132],"Alignment":[120],"Lexical":[122,142],"Constraint":[123],"method,":[125],"dubbed":[126],"as":[127],"CALeC.":[128],"It":[129],"contains":[130],"Semantic":[133],"Interactor":[134],"(arr.":[135,145],"CSI),":[136],"inferrer,":[139],"Constraint-aware":[143],"Generator":[144],"LeCG).":[146],"Specifically,":[147],"CSI":[148],"exploits":[149],"structure":[152],"inherent":[153],"in":[154],"various":[157],"regions":[159],"chunk-aware":[162],"alignment.":[164],"Relation":[165],"inferrer":[166,195],"uses":[167],"an":[168],"attention-based":[169],"reasoning":[170],"network":[171],"incorporate":[173,186],"chunk-level":[177],"representations.":[179],"LeCG":[180],"utilizes":[181],"lexical":[182],"constraints":[183],"expressly":[185],"words":[188],"or":[189],"chunks":[190],"focused":[191],"by":[192],"into":[196],"generation,":[198],"improving":[199],"faithfulness":[201],"informativeness":[203],"explanations.":[206,232],"We":[207],"conduct":[208],"extensive":[209],"experiments":[210],"three":[212],"datasets,":[213],"experimental":[215],"results":[216],"indicate":[217],"that":[218],"CALeC":[219],"significantly":[220],"outperforms":[221],"other":[222],"competitor":[223],"accuracy":[227],"quality":[229]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":5}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
