{"id":"https://openalex.org/W2952372688","doi":"https://doi.org/10.18653/v1/p19-1351","title":"Improving Visual Question Answering by Referring to Generated Paragraph Captions","display_name":"Improving Visual Question Answering by Referring to Generated Paragraph Captions","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2952372688","doi":"https://doi.org/10.18653/v1/p19-1351","mag":"2952372688"},"language":"en","primary_location":{"id":"doi:10.18653/v1/p19-1351","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1351","pdf_url":"https://www.aclweb.org/anthology/P19-1351.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/P19-1351.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5041514597","display_name":"Hyounghun Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I114027177","display_name":"University of North Carolina at Chapel Hill","ror":"https://ror.org/0130frc33","country_code":"US","type":"education","lineage":["https://openalex.org/I114027177"]},{"id":"https://openalex.org/I1333535994","display_name":"University of North Carolina Health Care","ror":"https://ror.org/00qz24g20","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1333535994"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hyounghun Kim","raw_affiliation_strings":["UNC Chapel Hill"],"affiliations":[{"raw_affiliation_string":"UNC Chapel Hill","institution_ids":["https://openalex.org/I1333535994","https://openalex.org/I114027177"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001987532","display_name":"Mohit Bansal","orcid":"https://orcid.org/0000-0001-5522-1351"},"institutions":[{"id":"https://openalex.org/I114027177","display_name":"University of North Carolina at Chapel Hill","ror":"https://ror.org/0130frc33","country_code":"US","type":"education","lineage":["https://openalex.org/I114027177"]},{"id":"https://openalex.org/I1333535994","display_name":"University of North Carolina Health Care","ror":"https://ror.org/00qz24g20","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1333535994"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mohit Bansal","raw_affiliation_strings":["UNC Chapel Hill"],"affiliations":[{"raw_affiliation_string":"UNC Chapel Hill","institution_ids":["https://openalex.org/I1333535994","https://openalex.org/I114027177"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5041514597"],"corresponding_institution_ids":["https://openalex.org/I114027177","https://openalex.org/I1333535994"],"apc_list":null,"apc_paid":null,"fwci":1.839,"has_fulltext":true,"cited_by_count":24,"citation_normalized_percentile":{"value":0.8867065,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3606","last_page":"3612"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/paragraph","display_name":"Paragraph","score":0.9343340992927551},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.8434828519821167},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8001707196235657},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.6656697392463684},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.630199670791626},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6161108016967773},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5342330932617188},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5121216773986816},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5049424767494202},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.43158209323883057}],"concepts":[{"id":"https://openalex.org/C2777206241","wikidata":"https://www.wikidata.org/wiki/Q194431","display_name":"Paragraph","level":2,"score":0.9343340992927551},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.8434828519821167},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8001707196235657},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.6656697392463684},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.630199670791626},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6161108016967773},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5342330932617188},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5121216773986816},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5049424767494202},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.43158209323883057},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/p19-1351","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1351","pdf_url":"https://www.aclweb.org/anthology/P19-1351.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/p19-1351","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1351","pdf_url":"https://www.aclweb.org/anthology/P19-1351.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.4300000071525574,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G6697946165","display_name":null,"funder_award_id":"1840131","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7452299184","display_name":null,"funder_award_id":"W911NF","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G8998121839","display_name":null,"funder_award_id":"911NF","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2952372688.pdf","grobid_xml":"https://content.openalex.org/works/W2952372688.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1522301498","https://openalex.org/W1895641373","https://openalex.org/W1933349210","https://openalex.org/W1956340063","https://openalex.org/W2157331557","https://openalex.org/W2250539671","https://openalex.org/W2277195237","https://openalex.org/W2412400526","https://openalex.org/W2463565445","https://openalex.org/W2481240925","https://openalex.org/W2507009361","https://openalex.org/W2549599535","https://openalex.org/W2551396370","https://openalex.org/W2560730294","https://openalex.org/W2563399268","https://openalex.org/W2605045867","https://openalex.org/W2613718673","https://openalex.org/W2745461083","https://openalex.org/W2890781596","https://openalex.org/W2951684117","https://openalex.org/W2962749469","https://openalex.org/W2963084599","https://openalex.org/W2963150162","https://openalex.org/W2963383024","https://openalex.org/W2963656855","https://openalex.org/W2963758027","https://openalex.org/W2963954913","https://openalex.org/W2964121744","https://openalex.org/W3016211260","https://openalex.org/W4298392976"],"related_works":["https://openalex.org/W2377059580","https://openalex.org/W127000293","https://openalex.org/W2391800119","https://openalex.org/W2595239241","https://openalex.org/W2799181378","https://openalex.org/W2052919063","https://openalex.org/W3003711649","https://openalex.org/W2904173691","https://openalex.org/W4365517254","https://openalex.org/W270947280"],"abstract_inverted_index":{"Paragraph-style":[0],"image":[1,8,36,56],"captions":[2,16,28],"describe":[3],"diverse":[4],"aspects":[5],"of":[6,23,34,151,168],"an":[7,20,161,183],"as":[9,40,110,115,117],"opposed":[10],"to":[11,137,164],"the":[12,24,35,55,82,88,118,123,133,149,166,200,206],"more":[13,62,66,190],"common":[14],"single-sentence":[15],"that":[17,76,175],"only":[18],"provide":[19],"abstract":[21,63],"description":[22],"image.":[25],"These":[26],"paragraph":[27,113,176],"can":[29,59,77],"hence":[30],"contain":[31],"substantial":[32],"information":[33,47,52,70,140],"for":[37],"tasks":[38],"such":[39],"visual":[41,51,191],"question":[42,84,125],"answering.":[43],"Moreover,":[44],"this":[45],"textual":[46,83,89],"is":[48],"complementary":[49],"with":[50,81],"present":[53],"in":[54,148],"because":[57],"it":[58],"discuss":[60],"both":[61,128],"concepts":[64],"and":[65,74,85,102,121,155],"explicit,":[67],"intermediate":[68],"symbolic":[69],"about":[71],"objects,":[72],"events,":[73],"scenes":[75],"directly":[78],"be":[79],"matched":[80],"copied":[86],"into":[87],"answer":[90,189],"(i.e.,":[91],"via":[92],"easier":[93],"modality":[94],"match).":[95],"Hence,":[96],"we":[97],"propose":[98],"a":[99,112,210],"combined":[100],"Visual":[101,201],"Textual":[103],"Question":[104],"Answering":[105],"(VTQA)":[106],"model":[107],"which":[108],"takes":[109],"input":[111],"caption":[114],"well":[116],"corresponding":[119],"image,":[120],"answers":[122,158],"given":[124,160],"based":[126],"on":[127,199],"inputs.":[129],"In":[130],"our":[131,194],"model,":[132,196],"inputs":[134],"are":[135,159],"fused":[136,146],"extract":[138],"related":[139],"by":[141],"cross-attention":[142],"(early":[143],"fusion),":[144,154],"then":[145],"again":[147],"form":[150],"consensus":[152],"(late":[153],"finally":[156],"expected":[157],"extra":[162],"score":[163],"enhance":[165],"chance":[167],"selection":[169],"(later":[170],"fusion).":[171],"Empirical":[172],"results":[173],"show":[174],"captions,":[177],"even":[178],"when":[179,197],"automatically":[180],"generated":[181],"(via":[182],"RL-based":[184],"encoderdecoder":[185],"model),":[186],"help":[187],"correctly":[188],"questions.":[192],"Overall,":[193],"joint":[195],"trained":[198],"Genome":[202],"dataset,":[203],"significantly":[204],"improves":[205],"VQA":[207],"performance":[208],"over":[209],"strong":[211],"baseline":[212],"model.":[213]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":6}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
