{"id":"https://openalex.org/W1924121366","doi":"https://doi.org/10.1109/cvpr.2015.7298917","title":"Don't just listen, use your imagination: Leveraging visual common sense for non-visual tasks","display_name":"Don't just listen, use your imagination: Leveraging visual common sense for non-visual tasks","publication_year":2015,"publication_date":"2015-06-01","ids":{"openalex":"https://openalex.org/W1924121366","doi":"https://doi.org/10.1109/cvpr.2015.7298917","mag":"1924121366"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr.2015.7298917","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr.2015.7298917","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100450510","display_name":"Xiao Lin","orcid":"https://orcid.org/0000-0001-8518-2293"},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"education","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xiao Lin","raw_affiliation_strings":["Virginia Tech","Virginia Tech, USA,"],"affiliations":[{"raw_affiliation_string":"Virginia Tech","institution_ids":["https://openalex.org/I859038795"]},{"raw_affiliation_string":"Virginia Tech, USA,","institution_ids":["https://openalex.org/I859038795"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050342343","display_name":"Devi Parikh","orcid":null},"institutions":[{"id":"https://openalex.org/I859038795","display_name":"Virginia Tech","ror":"https://ror.org/02smfhw86","country_code":"US","type":"education","lineage":["https://openalex.org/I859038795"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Devi Parikh","raw_affiliation_strings":["Virginia Tech","Virginia Tech, USA,"],"affiliations":[{"raw_affiliation_string":"Virginia Tech","institution_ids":["https://openalex.org/I859038795"]},{"raw_affiliation_string":"Virginia Tech, USA,","institution_ids":["https://openalex.org/I859038795"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100450510"],"corresponding_institution_ids":["https://openalex.org/I859038795"],"apc_list":null,"apc_paid":null,"fwci":4.9705,"has_fulltext":false,"cited_by_count":44,"citation_normalized_percentile":{"value":0.96832549,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2984","last_page":"2993"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/common-sense","display_name":"Common sense","score":0.8229806423187256},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7480688095092773},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7173867225646973},{"id":"https://openalex.org/keywords/commonsense-reasoning","display_name":"Commonsense reasoning","score":0.5357649922370911},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.5305414795875549},{"id":"https://openalex.org/keywords/commonsense-knowledge","display_name":"Commonsense knowledge","score":0.49141180515289307},{"id":"https://openalex.org/keywords/blank","display_name":"Blank","score":0.4898545444011688},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48947274684906006},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.43261483311653137},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4301871061325073},{"id":"https://openalex.org/keywords/visual-reasoning","display_name":"Visual reasoning","score":0.42666754126548767},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.40281808376312256},{"id":"https://openalex.org/keywords/cognitive-science","display_name":"Cognitive science","score":0.32744818925857544},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.17680534720420837},{"id":"https://openalex.org/keywords/domain-knowledge","display_name":"Domain knowledge","score":0.15059176087379456},{"id":"https://openalex.org/keywords/epistemology","display_name":"Epistemology","score":0.12671977281570435},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.10139307379722595}],"concepts":[{"id":"https://openalex.org/C2779814899","wikidata":"https://www.wikidata.org/wiki/Q332880","display_name":"Common sense","level":2,"score":0.8229806423187256},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7480688095092773},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7173867225646973},{"id":"https://openalex.org/C193221554","wikidata":"https://www.wikidata.org/wiki/Q5153664","display_name":"Commonsense reasoning","level":2,"score":0.5357649922370911},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.5305414795875549},{"id":"https://openalex.org/C30542707","wikidata":"https://www.wikidata.org/wiki/Q1603203","display_name":"Commonsense knowledge","level":3,"score":0.49141180515289307},{"id":"https://openalex.org/C2778089247","wikidata":"https://www.wikidata.org/wiki/Q368951","display_name":"Blank","level":2,"score":0.4898545444011688},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48947274684906006},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.43261483311653137},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4301871061325073},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.42666754126548767},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.40281808376312256},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.32744818925857544},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.17680534720420837},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.15059176087379456},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.12671977281570435},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10139307379722595},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr.2015.7298917","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr.2015.7298917","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6100000143051147,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306216","display_name":"Paul G. Allen Family Foundation","ror":"https://ror.org/01degd278"},{"id":"https://openalex.org/F4320310812","display_name":"Allen Foundation","ror":"https://ror.org/036jq7b41"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":76,"referenced_works":["https://openalex.org/W13682356","https://openalex.org/W46519926","https://openalex.org/W141352744","https://openalex.org/W1468923932","https://openalex.org/W1512387364","https://openalex.org/W1552847225","https://openalex.org/W1584193343","https://openalex.org/W1846689784","https://openalex.org/W1891689858","https://openalex.org/W1897761818","https://openalex.org/W1924121366","https://openalex.org/W1933502375","https://openalex.org/W1960578971","https://openalex.org/W1964763677","https://openalex.org/W1982185844","https://openalex.org/W1983927101","https://openalex.org/W1995628331","https://openalex.org/W1996418862","https://openalex.org/W1999818274","https://openalex.org/W2002658919","https://openalex.org/W2010625607","https://openalex.org/W2018299767","https://openalex.org/W2030358157","https://openalex.org/W2032165333","https://openalex.org/W2035430745","https://openalex.org/W2036196300","https://openalex.org/W2036242214","https://openalex.org/W2050482109","https://openalex.org/W2066134726","https://openalex.org/W2067816745","https://openalex.org/W2081580037","https://openalex.org/W2081613070","https://openalex.org/W2094728533","https://openalex.org/W2103163130","https://openalex.org/W2107901333","https://openalex.org/W2114996241","https://openalex.org/W2122865749","https://openalex.org/W2125436662","https://openalex.org/W2125436846","https://openalex.org/W2128856065","https://openalex.org/W2134270519","https://openalex.org/W2141364309","https://openalex.org/W2145276819","https://openalex.org/W2147414309","https://openalex.org/W2151498684","https://openalex.org/W2153579005","https://openalex.org/W2168356304","https://openalex.org/W2171278097","https://openalex.org/W2188538318","https://openalex.org/W2250861254","https://openalex.org/W2573403082","https://openalex.org/W2593172760","https://openalex.org/W2611071287","https://openalex.org/W2916626863","https://openalex.org/W2953049742","https://openalex.org/W3143107425","https://openalex.org/W3160040553","https://openalex.org/W4294170691","https://openalex.org/W4298392976","https://openalex.org/W6601941461","https://openalex.org/W6605733523","https://openalex.org/W6633136396","https://openalex.org/W6634846276","https://openalex.org/W6639086153","https://openalex.org/W6639622275","https://openalex.org/W6639694449","https://openalex.org/W6640282676","https://openalex.org/W6640379509","https://openalex.org/W6641085288","https://openalex.org/W6678890848","https://openalex.org/W6681613270","https://openalex.org/W6681853784","https://openalex.org/W6682086655","https://openalex.org/W6682691769","https://openalex.org/W6732742072","https://openalex.org/W6734033041"],"related_works":["https://openalex.org/W3035583586","https://openalex.org/W4313191056","https://openalex.org/W2151799802","https://openalex.org/W3021007069","https://openalex.org/W2196779496","https://openalex.org/W2012768296","https://openalex.org/W4389518870","https://openalex.org/W2981750021","https://openalex.org/W4302773889","https://openalex.org/W3146195548"],"abstract_inverted_index":{"Artificial":[0],"agents":[1],"today":[2],"can":[3,147],"answer":[4],"factual":[5],"questions.":[6,125],"But":[7,35],"they":[8],"fall":[9],"short":[10],"on":[11,28,141],"questions":[12],"that":[13,158],"require":[14],"common":[15,24,38,60,80,89],"sense":[16,25,39,61,81,90],"reasoning.":[17],"Perhaps":[18],"this":[19,75],"is":[20,41,62,71],"because":[21,45,58],"most":[22],"existing":[23],"databases":[26],"rely":[27],"text":[29],"to":[30,49,53,64,102,119,151],"learn":[31],"and":[32,56,97,109,164],"represent":[33],"knowledge.":[34],"much":[36],"of":[37],"knowledge":[40,82],"unwritten":[42],"-":[43,86,91],"partly":[44,57],"it":[46,70],"tends":[47],"not":[48,72],"be":[50],"interesting":[51],"enough":[52],"talk":[54],"about,":[55],"some":[59],"unnatural":[63],"articulate":[65],"in":[66,92,117,155],"text.":[67],"While":[68],"unwritten,":[69],"unseen.":[73],"In":[74],"paper":[76],"we":[77],"leverage":[78,110],"semantic":[79],"learned":[83],"from":[84,113],"images":[85],"i.e.":[87],"visual":[88,98,111,132],"two":[93],"textual":[94,120],"tasks:":[95],"fill-in-the-blank":[96],"paraphrasing.":[99],"We":[100,126],"propose":[101],"\u201cimagine\u201d":[103],"the":[104,107,114,128],"scene":[105],"behind":[106],"text,":[108],"cues":[112,121],"\u201cimagined\u201d":[115],"scenes":[116,129],"addition":[118],"while":[122],"answering":[123],"these":[124,142],"imagine":[127],"as":[130,149],"a":[131,137],"abstraction.":[133],"Our":[134,144,162],"approach":[135],"outperforms":[136],"strong":[138],"text-only":[139],"baseline":[140],"tasks.":[143],"proposed":[145],"tasks":[146,157],"serve":[148],"benchmarks":[150],"quantitatively":[152],"evaluate":[153],"progress":[154],"solving":[156],"go":[159],"\u201cbeyond":[160],"recognition\u201d.":[161],"code":[163],"datasets":[165],"are":[166],"publicly":[167],"available.":[168]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":9},{"year":2015,"cited_by_count":7}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
