{"id":"https://openalex.org/W4385822375","doi":"https://doi.org/10.21437/interspeech.2023-539","title":"Enhancing Visual Question Answering via Deconstructing Questions and Explicating Answers","display_name":"Enhancing Visual Question Answering via Deconstructing Questions and Explicating Answers","publication_year":2023,"publication_date":"2023-08-14","ids":{"openalex":"https://openalex.org/W4385822375","doi":"https://doi.org/10.21437/interspeech.2023-539"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2023-539","is_oa":true,"landing_page_url":"https://doi.org/10.21437/interspeech.2023-539","pdf_url":"https://www.isca-archive.org/interspeech_2023/chen23f_interspeech.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERSPEECH 2023","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.isca-archive.org/interspeech_2023/chen23f_interspeech.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5104296705","display_name":"Feilong Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Feilong Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027728339","display_name":"Minglun Han","orcid":"https://orcid.org/0000-0002-5120-069X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Minglun Han","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075879347","display_name":"Jing Shi","orcid":"https://orcid.org/0000-0003-1147-0975"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jing Shi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101188547","display_name":"Shuang Xu","orcid":"https://orcid.org/0009-0000-3293-4391"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shuang Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5101771543","display_name":"Bo Xu","orcid":"https://orcid.org/0000-0001-5389-8131"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bo Xu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5104296705"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08911564,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3447","last_page":"3451"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9825000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10636","display_name":"Innovative Teaching and Learning Methods","score":0.9214000105857849,"subfield":{"id":"https://openalex.org/subfields/3204","display_name":"Developmental and Educational Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.814210832118988},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5042833089828491},{"id":"https://openalex.org/keywords/epistemology","display_name":"Epistemology","score":0.48808467388153076},{"id":"https://openalex.org/keywords/cognitive-science","display_name":"Cognitive science","score":0.3542194962501526},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.34288111329078674},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.29724299907684326},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.17128905653953552}],"concepts":[{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.814210832118988},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5042833089828491},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.48808467388153076},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.3542194962501526},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34288111329078674},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.29724299907684326},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.17128905653953552}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2023-539","is_oa":true,"landing_page_url":"https://doi.org/10.21437/interspeech.2023-539","pdf_url":"https://www.isca-archive.org/interspeech_2023/chen23f_interspeech.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERSPEECH 2023","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.21437/interspeech.2023-539","is_oa":true,"landing_page_url":"https://doi.org/10.21437/interspeech.2023-539","pdf_url":"https://www.isca-archive.org/interspeech_2023/chen23f_interspeech.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INTERSPEECH 2023","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.5699999928474426,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4385822375.pdf","grobid_xml":"https://content.openalex.org/works/W4385822375.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2384605597","https://openalex.org/W2387743295","https://openalex.org/W2115758952","https://openalex.org/W3082787378","https://openalex.org/W2136007095","https://openalex.org/W2366230879","https://openalex.org/W3208425359","https://openalex.org/W2349927912"],"abstract_inverted_index":{"A":[0],"compositional":[1,21,31,69],"question":[2,6,59,123],"refers":[3],"to":[4,23,56,73,95,100,103],"a":[5,30,52],"that":[7],"involves":[8],"multiple":[9],"visual":[10,58,122],"objects,":[11],"as":[12,14],"well":[13,28],"their":[15],"attributes":[16],"and":[17,41,64,86,106,130],"relationships,":[18],"which":[19,119],"requires":[20],"reasoning":[22,39,128],"answer.Existing":[24],"VQA":[25],"models":[26],"can":[27,36,120],"answer":[29,45,104],"question,":[32],"but":[33],"few":[34],"works":[35],"give":[37,107],"the":[38,97,112],"process":[40],"explain":[42],"why":[43],"this":[44,48],"is":[46],"given.In":[47],"paper,":[49],"we":[50],"propose":[51],"novel":[53],"model":[54],"(DEEX)":[55],"enhance":[57,121],"answering":[60,68,124],"via":[61],"DEconstructing":[62],"questions":[63,105],"EXplicating":[65],"answers":[66],"when":[67],"questions.Specifically,":[70],"DEEX":[71,99],"aims":[72],"accomplish":[74],"three":[75],"sub-tasks:":[76],"(1)":[77],"Compositional":[78],"Question":[79,83],"Answering":[80],"(CQA),":[81],"(2)":[82],"Deconstructing":[84],"(QD),":[85],"(3)":[87],"Answer":[88],"Explicating":[89],"(AE).We":[90],"utilize":[91],"prompt-based":[92],"multi-task":[93],"learning":[94],"train":[96],"proposed":[98],"be":[101],"able":[102],"explanations":[108],"simultaneously.Experimental":[109],"results":[110],"on":[111],"GQA":[113],"dataset":[114],"demonstrate":[115],"our":[116],"method's":[117],"effectiveness,":[118],"by":[125],"giving":[126],"corresponding":[127],"processes":[129],"explanations.":[131]},"counts_by_year":[],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2025-10-10T00:00:00"}
