{"id":"https://openalex.org/W4390970329","doi":"https://doi.org/10.1109/tmm.2024.3355638","title":"Learning to Supervise Knowledge Retrieval Over a Tree Structure for Visual Question Answering","display_name":"Learning to Supervise Knowledge Retrieval Over a Tree Structure for Visual Question Answering","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4390970329","doi":"https://doi.org/10.1109/tmm.2024.3355638"},"language":"en","primary_location":{"id":"doi:10.1109/tmm.2024.3355638","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3355638","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054900679","display_name":"Ning Xu","orcid":"https://orcid.org/0000-0002-7526-4356"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ning Xu","raw_affiliation_strings":["School of Electrical and Information Engineering, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Information Engineering, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064962047","display_name":"Zimu Lu","orcid":"https://orcid.org/0000-0001-8479-2807"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zimu Lu","raw_affiliation_strings":["School of Electrical and Information Engineering, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Information Engineering, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019994406","display_name":"Hongshuo Tian","orcid":"https://orcid.org/0000-0001-7635-0961"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongshuo Tian","raw_affiliation_strings":["School of Electrical and Information Engineering, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Information Engineering, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062258244","display_name":"Rongbao Kang","orcid":"https://orcid.org/0000-0001-8487-8520"},"institutions":[{"id":"https://openalex.org/I2800372957","display_name":"China Electronics Technology Group Corporation","ror":"https://ror.org/0098hst83","country_code":"CN","type":"company","lineage":["https://openalex.org/I2800372957"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rongbao Kang","raw_affiliation_strings":["30th Research Institute of China Electronics Technology Corporation, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"30th Research Institute of China Electronics Technology Corporation, Chengdu, China","institution_ids":["https://openalex.org/I2800372957"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jinbo Cao","orcid":"https://orcid.org/0009-0003-5037-6989"},"institutions":[{"id":"https://openalex.org/I2800372957","display_name":"China Electronics Technology Group Corporation","ror":"https://ror.org/0098hst83","country_code":"CN","type":"company","lineage":["https://openalex.org/I2800372957"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinbo Cao","raw_affiliation_strings":["30th Research Institute of China Electronics Technology Corporation, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"30th Research Institute of China Electronics Technology Corporation, Chengdu, China","institution_ids":["https://openalex.org/I2800372957"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046305086","display_name":"Yongdong Zhang","orcid":"https://orcid.org/0000-0002-1151-1792"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongdong Zhang","raw_affiliation_strings":["University of Science and Technology of China, Hefei, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081485810","display_name":"An-An Liu","orcid":"https://orcid.org/0000-0001-5755-9145"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"An-An Liu","raw_affiliation_strings":["School of Electrical and Information Engineering, Tianjin University, Tianjin, China"],"affiliations":[{"raw_affiliation_string":"School of Electrical and Information Engineering, Tianjin University, Tianjin, China","institution_ids":["https://openalex.org/I162868743"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5054900679"],"corresponding_institution_ids":["https://openalex.org/I162868743"],"apc_list":null,"apc_paid":null,"fwci":0.7268,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.68080017,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"26","issue":null,"first_page":"6689","last_page":"6700"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9886999726295471,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8396360874176025},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.8313775062561035},{"id":"https://openalex.org/keywords/knowledge-extraction","display_name":"Knowledge extraction","score":0.6761291027069092},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6353002190589905},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5816549062728882},{"id":"https://openalex.org/keywords/domain-knowledge","display_name":"Domain knowledge","score":0.5235397815704346},{"id":"https://openalex.org/keywords/knowledge-based-systems","display_name":"Knowledge-based systems","score":0.5102741718292236},{"id":"https://openalex.org/keywords/knowledge-base","display_name":"Knowledge base","score":0.5054073333740234},{"id":"https://openalex.org/keywords/commonsense-knowledge","display_name":"Commonsense knowledge","score":0.503084123134613},{"id":"https://openalex.org/keywords/general-knowledge","display_name":"General knowledge","score":0.5009777545928955},{"id":"https://openalex.org/keywords/tree-structure","display_name":"Tree structure","score":0.4111831486225128},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40919703245162964},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.362448513507843},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.11691007018089294}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8396360874176025},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.8313775062561035},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.6761291027069092},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6353002190589905},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5816549062728882},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.5235397815704346},{"id":"https://openalex.org/C115925183","wikidata":"https://www.wikidata.org/wiki/Q1412694","display_name":"Knowledge-based systems","level":2,"score":0.5102741718292236},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.5054073333740234},{"id":"https://openalex.org/C30542707","wikidata":"https://www.wikidata.org/wiki/Q1603203","display_name":"Commonsense knowledge","level":3,"score":0.503084123134613},{"id":"https://openalex.org/C49929091","wikidata":"https://www.wikidata.org/wiki/Q1930471","display_name":"General knowledge","level":2,"score":0.5009777545928955},{"id":"https://openalex.org/C163797641","wikidata":"https://www.wikidata.org/wiki/Q2067937","display_name":"Tree structure","level":3,"score":0.4111831486225128},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40919703245162964},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.362448513507843},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.11691007018089294},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tmm.2024.3355638","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tmm.2024.3355638","pdf_url":null,"source":{"id":"https://openalex.org/S137030581","display_name":"IEEE Transactions on Multimedia","issn_l":"1520-9210","issn":["1520-9210","1941-0077"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Multimedia","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5299999713897705,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G173205433","display_name":null,"funder_award_id":"U21B2024","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7474616114","display_name":null,"funder_award_id":"62002257","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":73,"referenced_works":["https://openalex.org/W102708294","https://openalex.org/W639708223","https://openalex.org/W1861492603","https://openalex.org/W1933349210","https://openalex.org/W2016089260","https://openalex.org/W2080133951","https://openalex.org/W2083897630","https://openalex.org/W2150593711","https://openalex.org/W2525778437","https://openalex.org/W2561529111","https://openalex.org/W2745461083","https://openalex.org/W2747623286","https://openalex.org/W2760103357","https://openalex.org/W2947312908","https://openalex.org/W2963477107","https://openalex.org/W2963717374","https://openalex.org/W2963938081","https://openalex.org/W2964303913","https://openalex.org/W2966317026","https://openalex.org/W2966683369","https://openalex.org/W2969876226","https://openalex.org/W2970231061","https://openalex.org/W2997463623","https://openalex.org/W3004349648","https://openalex.org/W3035454069","https://openalex.org/W3044175177","https://openalex.org/W3087338569","https://openalex.org/W3087975588","https://openalex.org/W3091588028","https://openalex.org/W3093006710","https://openalex.org/W3093200502","https://openalex.org/W3101703188","https://openalex.org/W3115476810","https://openalex.org/W3139224848","https://openalex.org/W3172845486","https://openalex.org/W3173038784","https://openalex.org/W3189601601","https://openalex.org/W3195963939","https://openalex.org/W3199693760","https://openalex.org/W3203354307","https://openalex.org/W4221155360","https://openalex.org/W4224440661","https://openalex.org/W4226078866","https://openalex.org/W4226094317","https://openalex.org/W4226321975","https://openalex.org/W4280590775","https://openalex.org/W4285605365","https://openalex.org/W4304092583","https://openalex.org/W4309674289","https://openalex.org/W4312971273","https://openalex.org/W4318718936","https://openalex.org/W4319069017","https://openalex.org/W4367147048","https://openalex.org/W4375802461","https://openalex.org/W4380353763","https://openalex.org/W4382119268","https://openalex.org/W4385245566","https://openalex.org/W4385570290","https://openalex.org/W4386065596","https://openalex.org/W4390872859","https://openalex.org/W6631190155","https://openalex.org/W6727690538","https://openalex.org/W6752083267","https://openalex.org/W6754375721","https://openalex.org/W6766904570","https://openalex.org/W6767211374","https://openalex.org/W6789753369","https://openalex.org/W6791353385","https://openalex.org/W6838329711","https://openalex.org/W6838989559","https://openalex.org/W6839015040","https://openalex.org/W6849177959","https://openalex.org/W6852874933"],"related_works":["https://openalex.org/W1979978247","https://openalex.org/W1520100787","https://openalex.org/W3148901273","https://openalex.org/W2357854711","https://openalex.org/W2904591968","https://openalex.org/W4287642521","https://openalex.org/W2405862915","https://openalex.org/W3174464510","https://openalex.org/W4378501473","https://openalex.org/W2508944927"],"abstract_inverted_index":{"Knowledge-based":[0],"visual":[1],"question":[2],"answering":[3],"(KBVQA)":[4],"aims":[5],"to":[6,14,26,45,67,88,99,123,153,163,188],"retrieve":[7],"the":[8,52,68,101,114,125,132,140,146,150,155,160,177,194,197],"external":[9],"knowledge":[10,24,47,79,104,121,129,137,141,156,169,182],"out":[11],"of":[12,70,103,134,168,186,196],"images":[13,44],"answer":[15,83,172],"questions.":[16],"However,":[17],"current":[18],"methods":[19,33,74],"always":[20],"introduce":[21],"various":[22],"irrelevant":[23],"due":[25],"two":[27,95],"drawbacks:":[28],"(1)":[29],"Synonymy":[30],"issue.":[31,65],"Existing":[32],"heavily":[34],"rely":[35],"on":[36,136,200],"words":[37],"from":[38,48],"questions":[39],"or":[40],"object":[41],"labels":[42],"in":[43],"match":[46],"databases,":[49],"which":[50,78,85,158],"disregards":[51],"same":[53],"word":[54],"may":[55],"hold":[56],"multiple":[57],"meanings":[58],"within":[59],"different":[60],"contexts.":[61],"(2)":[62],"Knowledge":[63],"uncertainty":[64,142],"Due":[66],"absence":[69],"supervisory":[71],"signals,":[72],"recent":[73],"can":[75,86],"not":[76],"determine":[77],"is":[80],"applicable":[81],"for":[82,110,171],"inference,":[84],"mislead":[87],"admit":[89],"useless":[90],"knowledge.":[91],"To":[92],"address":[93],"these":[94],"problems,":[96],"we":[97,117,144,175],"propose":[98],"supervise":[100,154],"process":[102],"retrieval":[105,147],"over":[106],"a":[107,119,184],"tree":[108,122],"structure":[109],"KB-VQA":[111],"task.":[112],"For":[113,139],"synonymy":[115],"issue,":[116,143],"construct":[118],"hierarchical":[120],"capture":[124],"subordination":[126],"information":[127],"between":[128],"facts,":[130],"mitigating":[131],"impact":[133],"synonyms":[135],"retrieval.":[138],"use":[145],"history":[148],"as":[149],"ground":[151],"truth":[152],"retrieval,":[157],"facilitates":[159],"QA":[161],"model":[162],"form":[164],"an":[165],"explicit":[166],"path":[167],"facts":[170],"understanding.":[173],"Finally,":[174],"integrate":[176],"image,":[178],"question,":[179],"and":[180,203],"retrieved":[181],"into":[183],"variant":[185],"transformer":[187],"predict":[189],"answers.":[190],"Experimental":[191],"results":[192],"validate":[193],"effectiveness":[195],"proposed":[198],"method":[199],"KR-VQA,":[201],"OK-VQA":[202],"VQA":[204],"v2":[205],"datasets.":[206]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2025-10-10T00:00:00"}
