{"id":"https://openalex.org/W4296079526","doi":"https://doi.org/10.1109/tcsvt.2022.3207228","title":"Heterogeneous Knowledge Network for Visual Dialog","display_name":"Heterogeneous Knowledge Network for Visual Dialog","publication_year":2022,"publication_date":"2022-09-16","ids":{"openalex":"https://openalex.org/W4296079526","doi":"https://doi.org/10.1109/tcsvt.2022.3207228"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2022.3207228","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2022.3207228","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101749819","display_name":"Lei Zhao","orcid":"https://orcid.org/0000-0002-8838-164X"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lei Zhao","raw_affiliation_strings":["Center for Future Media, University of Electronic Science and Technology of China, Chengdu, China","Sichuan Artificial Intelligence Research Institute, Yibin, China"],"affiliations":[{"raw_affiliation_string":"Center for Future Media, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]},{"raw_affiliation_string":"Sichuan Artificial Intelligence Research Institute, Yibin, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100712918","display_name":"Junlin Li","orcid":"https://orcid.org/0000-0002-1931-0530"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junlin Li","raw_affiliation_strings":["Center for Future Media, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Center for Future Media, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066645546","display_name":"Lianli Gao","orcid":"https://orcid.org/0000-0002-2522-6394"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lianli Gao","raw_affiliation_strings":["Center for Future Media, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Center for Future Media, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058197437","display_name":"Yunbo Rao","orcid":null},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunbo Rao","raw_affiliation_strings":["School of Information and Software Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Software Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036987388","display_name":"Jingkuan Song","orcid":"https://orcid.org/0000-0002-2549-8322"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingkuan Song","raw_affiliation_strings":["Center for Future Media, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Center for Future Media, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052993469","display_name":"Heng Tao Shen","orcid":"https://orcid.org/0000-0002-2999-2088"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Heng Tao Shen","raw_affiliation_strings":["Center for Future Media, University of Electronic Science and Technology of China, Chengdu, China","Sichuan Artificial Intelligence Research Institute, Yibin, China"],"affiliations":[{"raw_affiliation_string":"Center for Future Media, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]},{"raw_affiliation_string":"Sichuan Artificial Intelligence Research Institute, Yibin, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101749819"],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":2.0377,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.88186984,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"33","issue":"2","first_page":"861","last_page":"871"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dialog-box","display_name":"Dialog box","score":0.9200665950775146},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.852486252784729},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.6646971702575684},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.6500510573387146},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5921139121055603},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5485792756080627},{"id":"https://openalex.org/keywords/knowledge-graph","display_name":"Knowledge graph","score":0.5471850037574768},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5073475241661072},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.4742547571659088},{"id":"https://openalex.org/keywords/knowledge-base","display_name":"Knowledge base","score":0.4646010100841522},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.42046597599983215},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.15489411354064941},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.11354079842567444}],"concepts":[{"id":"https://openalex.org/C173853756","wikidata":"https://www.wikidata.org/wiki/Q86915","display_name":"Dialog box","level":2,"score":0.9200665950775146},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.852486252784729},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.6646971702575684},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.6500510573387146},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5921139121055603},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5485792756080627},{"id":"https://openalex.org/C2987255567","wikidata":"https://www.wikidata.org/wiki/Q33002955","display_name":"Knowledge graph","level":2,"score":0.5471850037574768},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5073475241661072},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.4742547571659088},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.4646010100841522},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.42046597599983215},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.15489411354064941},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.11354079842567444},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2022.3207228","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2022.3207228","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G899288709","display_name":null,"funder_award_id":"G2022036009L","funder_id":"https://openalex.org/F4320321540","funder_display_name":"Ministry of Science and Technology of the People's Republic of China"}],"funders":[{"id":"https://openalex.org/F4320321540","display_name":"Ministry of Science and Technology of the People's Republic of China","ror":"https://ror.org/027s68j25"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W2250539671","https://openalex.org/W2277195237","https://openalex.org/W2481240925","https://openalex.org/W2890531016","https://openalex.org/W2892245540","https://openalex.org/W2904452845","https://openalex.org/W2961553857","https://openalex.org/W2962861647","https://openalex.org/W2963101956","https://openalex.org/W2963536419","https://openalex.org/W2963758027","https://openalex.org/W2964120214","https://openalex.org/W2964218959","https://openalex.org/W2967045987","https://openalex.org/W2970355596","https://openalex.org/W2981582341","https://openalex.org/W2981902456","https://openalex.org/W2997547717","https://openalex.org/W3004349648","https://openalex.org/W3010715592","https://openalex.org/W3014545861","https://openalex.org/W3023511145","https://openalex.org/W3027560576","https://openalex.org/W3034291519","https://openalex.org/W3093017735","https://openalex.org/W3095309002","https://openalex.org/W3099143471","https://openalex.org/W3107092117","https://openalex.org/W3117854388","https://openalex.org/W3127165192","https://openalex.org/W3134272453","https://openalex.org/W3136792391","https://openalex.org/W3152635971","https://openalex.org/W3166513553","https://openalex.org/W3174865181","https://openalex.org/W3175445769","https://openalex.org/W3196936439","https://openalex.org/W3207659901","https://openalex.org/W4207035468","https://openalex.org/W4221148458","https://openalex.org/W4226017838","https://openalex.org/W4249013746","https://openalex.org/W6676647902","https://openalex.org/W6678470764","https://openalex.org/W6747225742","https://openalex.org/W6755207826"],"related_works":["https://openalex.org/W2098987383","https://openalex.org/W2795961259","https://openalex.org/W2117933979","https://openalex.org/W4298396513","https://openalex.org/W2283130723","https://openalex.org/W103938586","https://openalex.org/W2104718772","https://openalex.org/W2368721880","https://openalex.org/W1628562230","https://openalex.org/W4386298164"],"abstract_inverted_index":{"Visual":[0],"dialog":[1,13,59,105],"requires":[2],"an":[3,10],"agent":[4],"to":[5,80],"answer":[6],"successive":[7],"questions":[8,34,121],"considering":[9],"image":[11,100],"and":[12,35,37,77,122,151,171,182],"history,":[14],"which":[15,72,132],"is":[16,90,127],"a":[17,66],"classic":[18],"vision-language":[19],"task.":[20],"Despite":[21],"progress,":[22],"there":[23],"are":[24,96],"still":[25],"two":[26,143,166],"key":[27],"challenges:":[28],"1)":[29],"parsing":[30,117],"long":[31,118],"or":[32,119],"complex":[33,120],"answers":[36],"2)":[38],"dealing":[39],"with":[40],"the":[41,53,82,86,93,99,103,139,154,159,174,177,187],"visual":[42,58,104,135,160],"scene":[43,130],"containing":[44],"complicated":[45,140],"interactions":[46],"among":[47],"entities.":[48],"These":[49,142],"challenges":[50],"bring":[51],"about":[52],"unsatisfactory":[54],"consequence":[55],"of":[56,102,145,158,176,189],"current":[57],"methods.":[60],"In":[61],"this":[62],"paper,":[63],"we":[64],"propose":[65],"novel":[67],"Heterogeneous":[68],"Knowledge":[69],"Network":[70],"(HKNet),":[71],"leverages":[73],"textual":[74,87,108],"sequence":[75,88,109],"knowledge":[76,79,89,110,126,147],"graph":[78,125],"address":[81],"above":[83],"issues.":[84],"Specifically,":[85],"derived":[91],"from":[92,98],"sentences":[94],"that":[95],"retrieved":[97],"captions":[101],"dataset.":[106],"The":[107,124],"can":[111],"supplement":[112],"essential":[113],"common":[114],"sense":[115],"for":[116,137],"answers.":[123],"constructed":[128],"via":[129],"graph,":[131],"provides":[133],"complete":[134],"relationships":[136],"understanding":[138],"interactions.":[141],"kinds":[144],"heterogeneous":[146],"complement":[148],"each":[149],"other":[150],"jointly":[152],"improve":[153],"logical":[155],"reasoning":[156],"ability":[157],"dialog.":[161],"Extensive":[162],"experimental":[163],"results":[164,184],"on":[165],"benchmark":[167],"datasets:":[168],"VisDial":[169],"v0.9":[170],"v1.0":[172],"demonstrate":[173],"superiority":[175],"proposed":[178],"HKNet.":[179],"Ablation":[180],"studies":[181],"visualization":[183],"further":[185],"verify":[186],"effectiveness":[188],"our":[190],"method.":[191]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
