{"id":"https://openalex.org/W3093624740","doi":"https://doi.org/10.18653/v1/2021.naacl-main.248","title":"SOrT-ing VQA Models : Contrastive Gradient Learning for Improved Consistency","display_name":"SOrT-ing VQA Models : Contrastive Gradient Learning for Improved Consistency","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3093624740","doi":"https://doi.org/10.18653/v1/2021.naacl-main.248","mag":"3093624740"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2021.naacl-main.248","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2021.naacl-main.248","pdf_url":"https://aclanthology.org/2021.naacl-main.248.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2021.naacl-main.248.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046750126","display_name":"Sameer Dharur","orcid":"https://orcid.org/0000-0002-7131-4539"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sameer Dharur","raw_affiliation_strings":["Georgia Tech","Georgia Institute of Technology, Atlanta, United States"],"affiliations":[{"raw_affiliation_string":"Georgia Tech","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, United States","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059781275","display_name":"Purva Tendulkar","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]},{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Purva Tendulkar","raw_affiliation_strings":["Georgia Tech","University of California, San Diego","Georgia Institute of Technology, Atlanta, United States"],"affiliations":[{"raw_affiliation_string":"Georgia Tech","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"University of California, San Diego","institution_ids":["https://openalex.org/I36258959"]},{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, United States","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014035752","display_name":"Dhruv Batra","orcid":null},"institutions":[{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]},{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["IL","US"],"is_corresponding":false,"raw_author_name":"Dhruv Batra","raw_affiliation_strings":["Facebook AI Research","Georgia Tech","Georgia Institute of Technology, Atlanta, United States"],"affiliations":[{"raw_affiliation_string":"Facebook AI Research","institution_ids":["https://openalex.org/I2252078561"]},{"raw_affiliation_string":"Georgia Tech","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, United States","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050342343","display_name":"Devi Parikh","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]},{"id":"https://openalex.org/I2252078561","display_name":"Meta (Israel)","ror":"https://ror.org/02388em19","country_code":"IL","type":"company","lineage":["https://openalex.org/I2252078561","https://openalex.org/I4210114444"]}],"countries":["IL","US"],"is_corresponding":false,"raw_author_name":"Devi Parikh","raw_affiliation_strings":["Facebook AI Research","Georgia Tech","Meta (Israel), Tel Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"Facebook AI Research","institution_ids":["https://openalex.org/I2252078561"]},{"raw_affiliation_string":"Georgia Tech","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Meta (Israel), Tel Aviv, Israel","institution_ids":["https://openalex.org/I2252078561"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046238088","display_name":"Ramprasaath R. Selvaraju","orcid":null},"institutions":[{"id":"https://openalex.org/I4210155268","display_name":"Salesforce (United States)","ror":"https://ror.org/057315g56","country_code":"US","type":"company","lineage":["https://openalex.org/I4210155268"]},{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ramprasaath R. Selvaraju","raw_affiliation_strings":["Georgia Tech","Salesforce Research","#N#\u2021#N#Georgia Institute of Technology#N#"],"affiliations":[{"raw_affiliation_string":"Georgia Tech","institution_ids":["https://openalex.org/I130701444"]},{"raw_affiliation_string":"Salesforce Research","institution_ids":["https://openalex.org/I4210155268"]},{"raw_affiliation_string":"#N#\u2021#N#Georgia Institute of Technology#N#","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5046750126"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00541219,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3103","last_page":"3111"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.8930862545967102},{"id":"https://openalex.org/keywords/sort","display_name":"sort","score":0.8431013822555542},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.7045702338218689},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6591236591339111},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.6391698718070984},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6003661751747131},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.5467891693115234},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4747127592563629},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.36423632502555847},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.2776799201965332},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14525777101516724}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.8930862545967102},{"id":"https://openalex.org/C88548561","wikidata":"https://www.wikidata.org/wiki/Q347599","display_name":"sort","level":2,"score":0.8431013822555542},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.7045702338218689},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6591236591339111},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.6391698718070984},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6003661751747131},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.5467891693115234},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4747127592563629},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36423632502555847},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2776799201965332},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14525777101516724},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.18653/v1/2021.naacl-main.248","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2021.naacl-main.248","pdf_url":"https://aclanthology.org/2021.naacl-main.248.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2010.10038","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2010.10038","pdf_url":"https://arxiv.org/pdf/2010.10038","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},{"id":"mag:3093624740","is_oa":true,"landing_page_url":"http://export.arxiv.org/pdf/2010.10038","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.2010.10038","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2010.10038","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.18653/v1/2021.naacl-main.248","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2021.naacl-main.248","pdf_url":"https://aclanthology.org/2021.naacl-main.248.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3093624740.pdf","grobid_xml":"https://content.openalex.org/works/W3093624740.grobid-xml"},"referenced_works_count":16,"referenced_works":["https://openalex.org/W2273038706","https://openalex.org/W2521737809","https://openalex.org/W2759653627","https://openalex.org/W2771951981","https://openalex.org/W2788527488","https://openalex.org/W2944668088","https://openalex.org/W2950761309","https://openalex.org/W2953016680","https://openalex.org/W2962685807","https://openalex.org/W2963224792","https://openalex.org/W2973009097","https://openalex.org/W2982699810","https://openalex.org/W2983256121","https://openalex.org/W3001468315","https://openalex.org/W3037725825","https://openalex.org/W3102564565"],"related_works":["https://openalex.org/W3167665045","https://openalex.org/W3206082179","https://openalex.org/W88605193","https://openalex.org/W3176825161","https://openalex.org/W2891375654","https://openalex.org/W3154599769","https://openalex.org/W3206218097","https://openalex.org/W3191230274","https://openalex.org/W3212279921","https://openalex.org/W3116422100","https://openalex.org/W3039185146","https://openalex.org/W3181057974","https://openalex.org/W3210616652","https://openalex.org/W3086695003","https://openalex.org/W2889107415","https://openalex.org/W3120569501","https://openalex.org/W2799755555","https://openalex.org/W3014193764","https://openalex.org/W2952828155","https://openalex.org/W2949030303"],"abstract_inverted_index":{"Recent":[0],"research":[1],"in":[2,14,43],"Visual":[3],"Question":[4],"Answering":[5],"(VQA)":[6],"has":[7],"revealed":[8],"state-of-the-art":[9],"models":[10,47,91,122],"to":[11,38,51,54,71,88,95,101,123],"be":[12,52],"inconsistent":[13],"their":[15,93],"understanding":[16],"of":[17],"the":[18,44,56,73,79,97],"world":[19],"--":[20],"they":[21],"answer":[22,55,102],"seemingly":[23],"difficult":[24],"questions":[25,74,130],"requiring":[26],"reasoning":[27,80,104],"correctly":[28],"but":[29],"get":[30],"simpler":[31],"associated":[32],"sub-questions":[33,36,99,126],"wrong.":[34],"These":[35],"pertain":[37],"lower":[39],"level":[40,58],"visual":[41,151],"concepts":[42],"image":[45],"that":[46,136],"ideally":[48],"should":[49],"understand":[50],"able":[53],"higher":[57,127],"question":[59,81],"correctly.":[60],"To":[61],"address":[62],"this,":[63],"we":[64,107],"first":[65],"present":[66],"a":[67,103,109],"gradient-based":[68],"interpretability":[69],"approach":[70,114],"determine":[72],"most":[75],"strongly":[76],"correlated":[77],"with":[78],"on":[82,92],"an":[83,132],"image,":[84],"and":[85],"use":[86],"this":[87],"evaluate":[89],"VQA":[90],"ability":[94],"identify":[96],"relevant":[98,125],"needed":[100],"question.":[105],"Next,":[106],"propose":[108],"contrastive":[110],"gradient":[111],"learning":[112],"based":[113],"called":[115],"Sub-question":[116],"Oriented":[117],"Tuning":[118],"(SOrT)":[119],"which":[120],"encourages":[121],"rank":[124],"than":[128],"irrelevant":[129],"for":[131],"pair.":[133],"We":[134],"show":[135],"SOrT":[137],"improves":[138],"model":[139],"consistency":[140],"by":[141],"upto":[142],"6.5%":[143],"points":[144],"over":[145],"existing":[146],"baselines,":[147],"while":[148],"also":[149],"improving":[150],"grounding.":[152]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
