{"id":"https://openalex.org/W4312680532","doi":"https://doi.org/10.1109/cvpr52688.2022.01516","title":"Unsupervised Vision-Language Parsing: Seamlessly Bridging Visual Scene Graphs with Language Structures via Dependency Relationships","display_name":"Unsupervised Vision-Language Parsing: Seamlessly Bridging Visual Scene Graphs with Language Structures via Dependency Relationships","publication_year":2022,"publication_date":"2022-06-01","ids":{"openalex":"https://openalex.org/W4312680532","doi":"https://doi.org/10.1109/cvpr52688.2022.01516"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr52688.2022.01516","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52688.2022.01516","pdf_url":null,"source":{"id":"https://openalex.org/S4363607701","display_name":"2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091755280","display_name":"Chao Lou","orcid":"https://orcid.org/0000-0001-8105-6935"},"institutions":[{"id":"https://openalex.org/I30809798","display_name":"ShanghaiTech University","ror":"https://ror.org/030bhh786","country_code":"CN","type":"education","lineage":["https://openalex.org/I30809798"]},{"id":"https://openalex.org/I4392738278","display_name":"Beijing Institute for General Artificial Intelligence","ror":"https://ror.org/02kw1ws04","country_code":null,"type":"facility","lineage":["https://openalex.org/I4392738278"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chao Lou","raw_affiliation_strings":["Beijing Institute for General Artificial Intelligence (BIGAI),Beijing,China","Beijing Institute for General Artificial Intelligence (BIGAI), Beijing, China","ShanghaiTech University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute for General Artificial Intelligence (BIGAI),Beijing,China","institution_ids":["https://openalex.org/I4210100255"]},{"raw_affiliation_string":"Beijing Institute for General Artificial Intelligence (BIGAI), Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4392738278"]},{"raw_affiliation_string":"ShanghaiTech University, Shanghai, China","institution_ids":["https://openalex.org/I30809798"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101808738","display_name":"Wenjuan Han","orcid":"https://orcid.org/0000-0002-2327-0842"},"institutions":[{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]},{"id":"https://openalex.org/I4392738278","display_name":"Beijing Institute for General Artificial Intelligence","ror":"https://ror.org/02kw1ws04","country_code":null,"type":"facility","lineage":["https://openalex.org/I4392738278"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenjuan Han","raw_affiliation_strings":["Beijing Institute for General Artificial Intelligence (BIGAI),Beijing,China","Beijing Institute for General Artificial Intelligence (BIGAI), Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute for General Artificial Intelligence (BIGAI),Beijing,China","institution_ids":["https://openalex.org/I4210100255"]},{"raw_affiliation_string":"Beijing Institute for General Artificial Intelligence (BIGAI), Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4392738278"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101373524","display_name":"Yuhuan Lin","orcid":"https://orcid.org/0000-0002-5117-5893"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhuan Lin","raw_affiliation_strings":["Tsinghua Unversity,Beijing,China","Tsinghua Unversity, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua Unversity,Beijing,China","institution_ids":["https://openalex.org/I99065089"]},{"raw_affiliation_string":"Tsinghua Unversity, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056521090","display_name":"Zilong Zheng","orcid":"https://orcid.org/0000-0003-1219-5151"},"institutions":[{"id":"https://openalex.org/I4392738278","display_name":"Beijing Institute for General Artificial Intelligence","ror":"https://ror.org/02kw1ws04","country_code":null,"type":"facility","lineage":["https://openalex.org/I4392738278"]},{"id":"https://openalex.org/I4210100255","display_name":"Beijing Academy of Artificial Intelligence","ror":"https://ror.org/016a74861","country_code":"CN","type":"other","lineage":["https://openalex.org/I4210100255"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zilong Zheng","raw_affiliation_strings":["Beijing Institute for General Artificial Intelligence (BIGAI),Beijing,China","Beijing Institute for General Artificial Intelligence (BIGAI), Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute for General Artificial Intelligence (BIGAI),Beijing,China","institution_ids":["https://openalex.org/I4210100255"]},{"raw_affiliation_string":"Beijing Institute for General Artificial Intelligence (BIGAI), Beijing, China","institution_ids":["https://openalex.org/I4210100255","https://openalex.org/I4392738278"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5091755280"],"corresponding_institution_ids":["https://openalex.org/I30809798","https://openalex.org/I4210100255","https://openalex.org/I4392738278"],"apc_list":null,"apc_paid":null,"fwci":0.5393,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.75524027,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"15586","last_page":"15595"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9854000210762024,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8766870498657227},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.7048150897026062},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6969048976898193},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6852502822875977},{"id":"https://openalex.org/keywords/scene-graph","display_name":"Scene graph","score":0.6190807223320007},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.6154451966285706},{"id":"https://openalex.org/keywords/dependency-grammar","display_name":"Dependency grammar","score":0.6121563911437988},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.5409994125366211},{"id":"https://openalex.org/keywords/grammar","display_name":"Grammar","score":0.44472962617874146},{"id":"https://openalex.org/keywords/coreference","display_name":"Coreference","score":0.4337732195854187},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4178696572780609},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.41317951679229736},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.21773600578308105},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.14677762985229492},{"id":"https://openalex.org/keywords/resolution","display_name":"Resolution (logic)","score":0.09561187028884888}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8766870498657227},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.7048150897026062},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6969048976898193},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6852502822875977},{"id":"https://openalex.org/C179372163","wikidata":"https://www.wikidata.org/wiki/Q1406181","display_name":"Scene graph","level":3,"score":0.6190807223320007},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.6154451966285706},{"id":"https://openalex.org/C164883195","wikidata":"https://www.wikidata.org/wiki/Q674834","display_name":"Dependency grammar","level":3,"score":0.6121563911437988},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.5409994125366211},{"id":"https://openalex.org/C26022165","wikidata":"https://www.wikidata.org/wiki/Q8091","display_name":"Grammar","level":2,"score":0.44472962617874146},{"id":"https://openalex.org/C28076734","wikidata":"https://www.wikidata.org/wiki/Q63087","display_name":"Coreference","level":3,"score":0.4337732195854187},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4178696572780609},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.41317951679229736},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.21773600578308105},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.14677762985229492},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.09561187028884888},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr52688.2022.01516","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52688.2022.01516","pdf_url":null,"source":{"id":"https://openalex.org/S4363607701","display_name":"2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5899999737739563}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1660460062","https://openalex.org/W1861492603","https://openalex.org/W1895641373","https://openalex.org/W1987835821","https://openalex.org/W2040713190","https://openalex.org/W2077069816","https://openalex.org/W2086379197","https://openalex.org/W2088049833","https://openalex.org/W2119717200","https://openalex.org/W2124479173","https://openalex.org/W2137454801","https://openalex.org/W2189472871","https://openalex.org/W2277195237","https://openalex.org/W2565877108","https://openalex.org/W2566475580","https://openalex.org/W2744831323","https://openalex.org/W2808775391","https://openalex.org/W2886970679","https://openalex.org/W2932376173","https://openalex.org/W2949399644","https://openalex.org/W2949579048","https://openalex.org/W2950096400","https://openalex.org/W2963042258","https://openalex.org/W2963150697","https://openalex.org/W2963184176","https://openalex.org/W2963649796","https://openalex.org/W2964345792","https://openalex.org/W2967045987","https://openalex.org/W3034381157","https://openalex.org/W3034984754","https://openalex.org/W3035058125","https://openalex.org/W3092723116","https://openalex.org/W3098284381","https://openalex.org/W3100393531","https://openalex.org/W3100776995","https://openalex.org/W3106150792","https://openalex.org/W3113519452","https://openalex.org/W3117585461","https://openalex.org/W3125969836","https://openalex.org/W3139332675","https://openalex.org/W3171691518","https://openalex.org/W3174865181","https://openalex.org/W3178418424","https://openalex.org/W3201890160","https://openalex.org/W6637201659","https://openalex.org/W6639102338","https://openalex.org/W6753998590","https://openalex.org/W6761634535","https://openalex.org/W6779993203","https://openalex.org/W6788073916"],"related_works":["https://openalex.org/W2139373276","https://openalex.org/W2227889443","https://openalex.org/W1509033667","https://openalex.org/W4385749782","https://openalex.org/W3167631113","https://openalex.org/W2145164276","https://openalex.org/W2004630825","https://openalex.org/W2324061017","https://openalex.org/W2740479527","https://openalex.org/W2251252397"],"abstract_inverted_index":{"Understanding":[0],"realistic":[1],"visual":[2,15,29,82,172],"scene":[3,32,83],"images":[4],"together":[5],"with":[6],"language":[7,159],"descriptions":[8],"is":[9,78],"a":[10,45,60,68,102,138],"fundamental":[11],"task":[12,62],"towards":[13],"generic":[14],"understanding.":[16],"Previous":[17],"works":[18],"have":[19],"shown":[20],"compelling":[21],"comprehensive":[22],"results":[23],"by":[24,100,124,136],"building":[25,101],"hierarchical":[26],"structures":[27,122],"for":[28,145],"scenes":[30],"(e.g.,":[31,37],"graphs)":[33],"and":[34,85,162,174],"natural":[35],"languages":[36],"dependency":[38,87,175],"trees),":[39],"individually.":[40],"However,":[41],"how":[42],"to":[43,79,91,119,127],"construct":[44],"joint":[46,69],"vision-language":[47],"(VL)":[48],"structure":[49,71,180],"has":[50],"barely":[51],"been":[52],"investigated.":[53],"More":[54],"challenging":[55],"but":[56],"worthwhile,":[57],"we":[58,98,113,132],"introduce":[59],"new":[61,103],"that":[63],"targets":[64],"on":[65,154,177],"inducing":[66],"such":[67],"VL":[70,95,163,179],"in":[72],"an":[73,115],"unsupervised":[74],"manner.":[75],"Our":[76,149],"goal":[77],"bridge":[80],"the":[81,92,168],"graphs":[84],"linguistic":[86],"trees":[88],"seamlessly.":[89],"Due":[90],"lack":[93],"of":[94,170],"structural":[96],"data,":[97],"start":[99],"dataset":[104,135],"VLParse.":[105],"Rather":[106],"than":[107],"using":[108],"labor-intensive":[109],"labeling":[110],"from":[111],"scratch,":[112],"propose":[114],"automatic":[116],"alignment":[117],"procedure":[118],"produce":[120,128],"coarse":[121],"followed":[123],"human":[125],"refinement":[126],"high-quality":[129],"ones.":[130],"Moreover,":[131],"benchmark":[133],"our":[134],"proposing":[137],"contrastive":[139],"learning":[140],"(CL)-based":[141],"framework":[142],"VLGAE,":[143],"short":[144],"Vision-Language":[146],"Graph":[147],"Autoencoder.":[148],"model":[150],"obtains":[151],"superior":[152],"performance":[153],"two":[155],"derived":[156],"tasks,":[157],"i.e.,":[158],"grammar":[160],"induction":[161],"phrase":[164],"grounding.":[165],"Ablations":[166],"show":[167],"effectiveness":[169],"both":[171],"cues":[173],"relationships":[176],"fine-grained":[178],"construction.":[181]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
