{"id":"https://openalex.org/W2963722138","doi":"https://doi.org/10.1109/cvpr.2018.00757","title":"Visual Question Reasoning on General Dependency Tree","display_name":"Visual Question Reasoning on General Dependency Tree","publication_year":2018,"publication_date":"2018-06-01","ids":{"openalex":"https://openalex.org/W2963722138","doi":"https://doi.org/10.1109/cvpr.2018.00757","mag":"2963722138"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr.2018.00757","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr.2018.00757","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059253391","display_name":"Qingxing Cao","orcid":"https://orcid.org/0000-0001-7042-6726"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qingxing Cao","raw_affiliation_strings":["School of Data and Computer Science, Sun Yat-sen University, China"],"affiliations":[{"raw_affiliation_string":"School of Data and Computer Science, Sun Yat-sen University, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047878798","display_name":"Xiaodan Liang","orcid":"https://orcid.org/0000-0003-3213-3062"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaodan Liang","raw_affiliation_strings":["School of Data and Computer Science, Sun Yat-sen University, China"],"affiliations":[{"raw_affiliation_string":"School of Data and Computer Science, Sun Yat-sen University, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107894922","display_name":"Bailin Li","orcid":null},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bailin Li","raw_affiliation_strings":["School of Data and Computer Science, Sun Yat-sen University, China"],"affiliations":[{"raw_affiliation_string":"School of Data and Computer Science, Sun Yat-sen University, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042965510","display_name":"Guanbin Li","orcid":"https://orcid.org/0000-0002-4805-0926"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guanbin Li","raw_affiliation_strings":["School of Data and Computer Science, Sun Yat-sen University, China"],"affiliations":[{"raw_affiliation_string":"School of Data and Computer Science, Sun Yat-sen University, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100412937","display_name":"Liang Lin","orcid":"https://orcid.org/0000-0003-2248-3755"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang Lin","raw_affiliation_strings":["School of Data and Computer Science, Sun Yat-sen University, China"],"affiliations":[{"raw_affiliation_string":"School of Data and Computer Science, Sun Yat-sen University, China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5059253391"],"corresponding_institution_ids":["https://openalex.org/I157773358"],"apc_list":null,"apc_paid":null,"fwci":4.90900634,"has_fulltext":false,"cited_by_count":42,"citation_normalized_percentile":{"value":0.94967209,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"7249","last_page":"7257"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7738893032073975},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.6747251749038696},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6430270075798035},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.6357572674751282},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5867802500724792},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.5791653990745544},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.5590232014656067},{"id":"https://openalex.org/keywords/reasoning-system","display_name":"Reasoning system","score":0.5476322770118713},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.5085185766220093},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.48549559712409973},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.48108789324760437},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.46387067437171936},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.463575154542923},{"id":"https://openalex.org/keywords/spatial-intelligence","display_name":"Spatial intelligence","score":0.46321019530296326},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4540456533432007},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.44813260436058044},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.09647771716117859}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7738893032073975},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.6747251749038696},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6430270075798035},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.6357572674751282},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5867802500724792},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.5791653990745544},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.5590232014656067},{"id":"https://openalex.org/C89288958","wikidata":"https://www.wikidata.org/wiki/Q7301504","display_name":"Reasoning system","level":2,"score":0.5476322770118713},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.5085185766220093},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.48549559712409973},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.48108789324760437},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.46387067437171936},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.463575154542923},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.46321019530296326},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4540456533432007},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.44813260436058044},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.09647771716117859},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr.2018.00757","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr.2018.00757","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4699999988079071,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"},{"score":0.4399999976158142,"display_name":"No poverty","id":"https://metadata.un.org/sdg/1"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1861492603","https://openalex.org/W1933349210","https://openalex.org/W1962622193","https://openalex.org/W2131494463","https://openalex.org/W2131774270","https://openalex.org/W2142192571","https://openalex.org/W2194775991","https://openalex.org/W2250861254","https://openalex.org/W2293453011","https://openalex.org/W2340874616","https://openalex.org/W2463565445","https://openalex.org/W2471094925","https://openalex.org/W2532034655","https://openalex.org/W2560730294","https://openalex.org/W2561715562","https://openalex.org/W2600144439","https://openalex.org/W2725513608","https://openalex.org/W2735608653","https://openalex.org/W2745461083","https://openalex.org/W2949197413","https://openalex.org/W2950157285","https://openalex.org/W2962716332","https://openalex.org/W2962749469","https://openalex.org/W2962779575","https://openalex.org/W2962933067","https://openalex.org/W2963143606","https://openalex.org/W2963191264","https://openalex.org/W2963224792","https://openalex.org/W2963383024","https://openalex.org/W2963579811","https://openalex.org/W2963656855","https://openalex.org/W2963668159","https://openalex.org/W2963717374","https://openalex.org/W2963907629","https://openalex.org/W2963954913","https://openalex.org/W2964091467","https://openalex.org/W2964118342","https://openalex.org/W2964121744","https://openalex.org/W2964303913","https://openalex.org/W2964345214","https://openalex.org/W6631190155","https://openalex.org/W6679844565","https://openalex.org/W6697449767","https://openalex.org/W6703809471","https://openalex.org/W6718553084","https://openalex.org/W6719057275","https://openalex.org/W6720210739","https://openalex.org/W6728881024","https://openalex.org/W6738893770","https://openalex.org/W6743068658"],"related_works":["https://openalex.org/W3137117987","https://openalex.org/W1574950926","https://openalex.org/W2496096353","https://openalex.org/W2240909318","https://openalex.org/W1999714552","https://openalex.org/W4287215231","https://openalex.org/W123410143","https://openalex.org/W207131014","https://openalex.org/W2113245685","https://openalex.org/W2039143072"],"abstract_inverted_index":{"The":[0],"collaborative":[1,102],"reasoning":[2,52,72,89,204,209,242],"for":[3,13,73,115,138],"understanding":[4],"each":[5,116,139],"image-question":[6],"pair":[7],"is":[8,186],"very":[9,22],"critical":[10],"but":[11],"under-explored":[12],"an":[14,105,161,173,191],"interpretable":[15,192],"Visual":[16],"Question":[17],"Answering":[18],"(VQA)":[19],"system.":[20,243],"Although":[21],"recent":[23],"works":[24],"also":[25],"tried":[26],"the":[27,37,44,111,120,129,141,169,198,212,228,237],"explicit":[28],"compositional":[29],"processes":[30],"to":[31,49,55,68,109,127],"assemble":[32],"multiple":[33],"sub-tasks":[34],"embedded":[35],"in":[36,80,160,219],"questions,":[38],"their":[39],"models":[40],"heavily":[41],"rely":[42],"on":[43,62,224],"annotations":[45],"or":[46,59],"hand-crafted":[47],"rules":[48],"obtain":[50],"valid":[51],"layout,":[53],"leading":[54],"either":[56],"heavy":[57],"labor":[58],"poor":[60],"performance":[61],"composition":[63,125,166],"reasoning.":[64],"In":[65],"this":[66],"paper,":[67],"enable":[69],"global":[70,208],"context":[71],"better":[74],"aligning":[75],"image":[76,199],"and":[77,82,181,206,233],"language":[78],"domains":[79],"diverse":[81],"unrestricted":[83],"cases,":[84],"we":[85],"propose":[86],"a":[87,123,134,202,220],"novel":[88],"network":[90,98],"called":[91],"Adversarial":[92],"Composition":[93],"Modular":[94],"Network":[95],"(ACMN).":[96],"This":[97],"comprises":[99],"of":[100,149,156,172,176,189,215,230,240],"two":[101],"modules:":[103],"i)":[104],"adversarial":[106,142,162],"attention":[107,143,217],"module":[108,126,144,167],"exploit":[110],"local":[112],"visual":[113],"evidence":[114],"word":[117,151,158],"parsed":[118],"from":[119],"question;":[121],"ii)":[122],"residual":[124,165,182],"compose":[128],"previously":[130],"mined":[131],"evidence.":[132],"Given":[133],"dependency":[135],"parse":[136],"tree":[137],"question,":[140],"progressively":[145],"discovers":[146],"salient":[147],"regions":[148,155],"one":[150],"by":[152,210],"densely":[153],"combining":[154],"child":[157],"nodes":[159],"manner.":[163,222],"Then":[164],"merges":[168],"hidden":[170],"representations":[171],"arbitrary":[174],"number":[175],"children":[177],"through":[178],"sum":[179],"pooling":[180],"connection.":[183],"Our":[184],"ACMN":[185,232],"thus":[187],"capable":[188],"building":[190],"VQA":[193],"system":[194],"that":[195],"gradually":[196],"dives":[197],"cues":[200],"following":[201],"question-driven":[203],"route":[205],"makes":[207],"incorporating":[211],"learned":[213],"knowledge":[214],"all":[216],"modules":[218],"principled":[221],"Experiments":[223],"relational":[225],"datasets":[226],"demonstrate":[227],"superiority":[229],"our":[231,241],"visualization":[234],"results":[235],"show":[236],"explainable":[238],"capability":[239]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":13},{"year":2018,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
