{"id":"https://openalex.org/W4386072101","doi":"https://doi.org/10.1109/cvpr52729.2023.00261","title":"Teaching Structured Vision &amp; Language Concepts to Vision &amp; Language Models","display_name":"Teaching Structured Vision &amp; Language Concepts to Vision &amp; Language Models","publication_year":2023,"publication_date":"2023-06-01","ids":{"openalex":"https://openalex.org/W4386072101","doi":"https://doi.org/10.1109/cvpr52729.2023.00261"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr52729.2023.00261","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52729.2023.00261","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016502423","display_name":"Sivan Doveh","orcid":"https://orcid.org/0000-0003-2431-0620"},"institutions":[{"id":"https://openalex.org/I53964585","display_name":"Weizmann Institute of Science","ror":"https://ror.org/0316ej306","country_code":"IL","type":"education","lineage":["https://openalex.org/I53964585"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Sivan Doveh","raw_affiliation_strings":["IBM Research","Weizmann Institute of Science"],"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]},{"raw_affiliation_string":"Weizmann Institute of Science","institution_ids":["https://openalex.org/I53964585"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036051790","display_name":"Assaf Arbelle","orcid":"https://orcid.org/0000-0001-6559-2316"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Assaf Arbelle","raw_affiliation_strings":["IBM Research"],"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058406651","display_name":"Sivan Harary","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sivan Harary","raw_affiliation_strings":["IBM Research"],"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054401063","display_name":"Eli Schwartz","orcid":"https://orcid.org/0000-0002-3393-8666"},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Eli Schwartz","raw_affiliation_strings":["IBM Research","Tel-Aviv University"],"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]},{"raw_affiliation_string":"Tel-Aviv University","institution_ids":["https://openalex.org/I16391192"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066997569","display_name":"Roei Herzig","orcid":null},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Roei Herzig","raw_affiliation_strings":["IBM Research","Tel-Aviv University"],"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]},{"raw_affiliation_string":"Tel-Aviv University","institution_ids":["https://openalex.org/I16391192"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072571599","display_name":"Raja Giryes","orcid":"https://orcid.org/0000-0002-2830-0297"},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Raja Giryes","raw_affiliation_strings":["Tel-Aviv University"],"affiliations":[{"raw_affiliation_string":"Tel-Aviv University","institution_ids":["https://openalex.org/I16391192"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052325109","display_name":"Rog\u00e9rio Feris","orcid":"https://orcid.org/0000-0001-6399-0679"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rogerio Feris","raw_affiliation_strings":["MIT-IBM Watson AI Lab"],"affiliations":[{"raw_affiliation_string":"MIT-IBM Watson AI Lab","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049734237","display_name":"Rameswar Panda","orcid":"https://orcid.org/0000-0003-4359-2475"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rameswar Panda","raw_affiliation_strings":["MIT-IBM Watson AI Lab"],"affiliations":[{"raw_affiliation_string":"MIT-IBM Watson AI Lab","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076637753","display_name":"Shimon Ullman","orcid":"https://orcid.org/0000-0003-4331-298X"},"institutions":[{"id":"https://openalex.org/I53964585","display_name":"Weizmann Institute of Science","ror":"https://ror.org/0316ej306","country_code":"IL","type":"education","lineage":["https://openalex.org/I53964585"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Shimon Ullman","raw_affiliation_strings":["Weizmann Institute of Science"],"affiliations":[{"raw_affiliation_string":"Weizmann Institute of Science","institution_ids":["https://openalex.org/I53964585"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020676344","display_name":"Leonid Karlinsky","orcid":"https://orcid.org/0000-0003-2524-2068"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Leonid Karlinsky","raw_affiliation_strings":["MIT-IBM Watson AI Lab"],"affiliations":[{"raw_affiliation_string":"MIT-IBM Watson AI Lab","institution_ids":["https://openalex.org/I1341412227"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5016502423"],"corresponding_institution_ids":["https://openalex.org/I53964585"],"apc_list":null,"apc_paid":null,"fwci":5.0355,"has_fulltext":false,"cited_by_count":41,"citation_normalized_percentile":{"value":0.9666786,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"2657","last_page":"2668"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6949613094329834},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.557772159576416},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4583780765533447},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.34832102060317993}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6949613094329834},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.557772159576416},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4583780765533447},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.34832102060317993}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr52729.2023.00261","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr52729.2023.00261","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.800000011920929,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G2012895916","display_name":null,"funder_award_id":"FA8750-19-C-1001","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"}],"funders":[{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":99,"referenced_works":["https://openalex.org/W146900863","https://openalex.org/W2117539524","https://openalex.org/W2170240176","https://openalex.org/W2251658415","https://openalex.org/W2277195237","https://openalex.org/W2473930607","https://openalex.org/W2560647685","https://openalex.org/W2579549467","https://openalex.org/W2743151379","https://openalex.org/W2765407302","https://openalex.org/W2795378892","https://openalex.org/W2805516822","https://openalex.org/W2806331055","https://openalex.org/W2884282566","https://openalex.org/W2886641317","https://openalex.org/W2896457183","https://openalex.org/W2896659472","https://openalex.org/W2938667732","https://openalex.org/W2951702519","https://openalex.org/W2952122856","https://openalex.org/W2955882737","https://openalex.org/W2962369866","https://openalex.org/W2963184176","https://openalex.org/W2964194231","https://openalex.org/W2968124245","https://openalex.org/W2970231061","https://openalex.org/W2971296908","https://openalex.org/W2978329087","https://openalex.org/W2987504399","https://openalex.org/W2992308087","https://openalex.org/W2997786945","https://openalex.org/W3000279895","https://openalex.org/W3001197829","https://openalex.org/W3009131631","https://openalex.org/W3010293452","https://openalex.org/W3034257141","https://openalex.org/W3034679267","https://openalex.org/W3035682985","https://openalex.org/W3090449556","https://openalex.org/W3091588028","https://openalex.org/W3095753865","https://openalex.org/W3096682293","https://openalex.org/W3097619042","https://openalex.org/W3106759358","https://openalex.org/W3108512475","https://openalex.org/W3115894062","https://openalex.org/W3118608800","https://openalex.org/W3168867926","https://openalex.org/W3172112830","https://openalex.org/W3184735396","https://openalex.org/W3205071530","https://openalex.org/W3206930349","https://openalex.org/W3207750165","https://openalex.org/W3209532394","https://openalex.org/W3212456749","https://openalex.org/W4214482673","https://openalex.org/W4225307291","https://openalex.org/W4226182655","https://openalex.org/W4281930370","https://openalex.org/W4282935537","https://openalex.org/W4283075937","https://openalex.org/W4283821388","https://openalex.org/W4286588500","https://openalex.org/W4286897344","https://openalex.org/W4287658071","https://openalex.org/W4295727797","https://openalex.org/W4308014717","https://openalex.org/W4312261477","https://openalex.org/W4312877428","https://openalex.org/W4312884055","https://openalex.org/W4320516905","https://openalex.org/W4385574293","https://openalex.org/W6685053522","https://openalex.org/W6748508754","https://openalex.org/W6751795773","https://openalex.org/W6751796012","https://openalex.org/W6755207826","https://openalex.org/W6761634466","https://openalex.org/W6765939562","https://openalex.org/W6767279747","https://openalex.org/W6773005947","https://openalex.org/W6774314701","https://openalex.org/W6774569510","https://openalex.org/W6780196292","https://openalex.org/W6780803138","https://openalex.org/W6783983736","https://openalex.org/W6789753369","https://openalex.org/W6790019176","https://openalex.org/W6791353385","https://openalex.org/W6798805250","https://openalex.org/W6802987763","https://openalex.org/W6803728867","https://openalex.org/W6803953248","https://openalex.org/W6810039040","https://openalex.org/W6811013733","https://openalex.org/W6811102192","https://openalex.org/W6839021834","https://openalex.org/W6839196789","https://openalex.org/W6839415613"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Vision":[0,37],"and":[1,47,55,97,128,151,234],"Language":[2,39],"(":[3],"<tex":[4,68,109,123,160,188,192],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[5,69,110,124,161,189,193],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$VL$</tex>":[6,70,111,125,162,190,194],")":[7],"models":[8,71,195,236],"have":[9,62],"demonstrated":[10],"remarkable":[11],"zero-shot":[12,220],"performance":[13],"in":[14,52,57,158,209,218],"a":[15,28,102,202,215,229],"variety":[16],"of":[17,22,35,78,114,121,138,185,205],"tasks.":[18],"However,":[19],"some":[20],"aspects":[21],"complex":[23],"language":[24,145,173],"understanding":[25,113,137,175,212],"still":[26,141],"remain":[27],"challenge.":[29],"We":[30],"introduce":[31],"the":[32,53,58,66,182,198],"collective":[33],"notion":[34],"Structured":[36],"&":[38],"Concepts":[40],"(SVLC)":[41],"which":[42,49],"includes":[43],"object":[44],"attributes,":[45],"relations,":[46],"states":[48],"are":[50,237],"present":[51],"text":[54],"visible":[56],"image.":[59],"Recent":[60],"studies":[61],"shown":[63],"that":[64,116,176],"even":[65],"best":[67],"struggle":[72],"with":[73,197,213],"SVLC.":[74],"A":[75],"possible":[76],"way":[77],"fixing":[79],"this":[80,93,165],"issue":[81],"is":[82,147],"by":[83],"collecting":[84],"dedicated":[85],"datasets":[86,127],"for":[87,107,154],"teaching":[88,159],"each":[89],"SVLC":[90,211],"type,":[91],"yet":[92],"might":[94],"be":[95,178],"expensive":[96],"time-consuming.":[98],"Instead,":[99],"we":[100,167],"propose":[101,168],"more":[103,118],"elegant":[104],"data-driven":[105],"approach":[106],"enhancing":[108],"models'":[112],"SVLCs":[115],"makes":[117],"effective":[119,156],"use":[120],"existing":[122],"pre-training":[126],"does":[129],"not":[130],"require":[131],"any":[132],"additional":[133],"data.":[134],"While":[135],"automatic":[136],"image":[139],"structure":[140,146,174],"remains":[142],"largely":[143],"unsolved,":[144],"much":[148],"better":[149],"modeled":[150],"understood,":[152],"allowing":[153],"its":[155],"utilization":[157],"models.":[163],"In":[164],"paper,":[166],"various":[169],"techniques":[170],"based":[171],"on":[172],"can":[177],"used":[179],"to":[180,207],"manipulate":[181],"textual":[183],"part":[184],"off-the-shelf":[186],"paired":[187],"datasets.":[191],"trained":[196],"updated":[199],"data":[200],"exhibit":[201],"significant":[203],"improvement":[204],"up":[206],"15%":[208],"their":[210,219],"only":[214],"mild":[216],"degradation":[217],"capabilities":[221],"both":[222],"when":[223],"training":[224],"from":[225],"scratch":[226],"or":[227],"fine-tuning":[228],"pre-trained":[230],"model.":[231],"Our":[232],"code":[233],"pretrained":[235],"available":[238],"at:":[239],"https://github.com/SivanDoveh/TSVLC":[240]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":16},{"year":2024,"cited_by_count":22},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
