{"id":"https://openalex.org/W2972062950","doi":"https://doi.org/10.1145/3357384.3357987","title":"Inferring Context from Pixels for Multimodal Image Classification","display_name":"Inferring Context from Pixels for Multimodal Image Classification","publication_year":2019,"publication_date":"2019-11-03","ids":{"openalex":"https://openalex.org/W2972062950","doi":"https://doi.org/10.1145/3357384.3357987","mag":"2972062950"},"language":"en","primary_location":{"id":"doi:10.1145/3357384.3357987","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3357384.3357987","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082670242","display_name":"Manan Shah","orcid":"https://orcid.org/0000-0001-8865-6896"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Manan Shah","raw_affiliation_strings":["Stanford University &amp; Google Research, Stanford, CA, USA"],"affiliations":[{"raw_affiliation_string":"Stanford University &amp; Google Research, Stanford, CA, USA","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110385376","display_name":"Krishnamurthy Viswanathan","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Krishnamurthy Viswanathan","raw_affiliation_strings":["Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024393012","display_name":"Chun-Ta Lu","orcid":"https://orcid.org/0000-0001-8573-4975"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chun-Ta Lu","raw_affiliation_strings":["Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017633018","display_name":"Ariel Fuxman","orcid":"https://orcid.org/0009-0003-6760-997X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ariel Fuxman","raw_affiliation_strings":["Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100680995","display_name":"Zhen Li","orcid":"https://orcid.org/0000-0001-5093-4221"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhen Li","raw_affiliation_strings":["Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005934225","display_name":"Aleksei Timofeev","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aleksei Timofeev","raw_affiliation_strings":["Waymo, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Waymo, Mountain View, CA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101773594","display_name":"Chao Jia","orcid":"https://orcid.org/0000-0001-8706-0935"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chao Jia","raw_affiliation_strings":["Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100722237","display_name":"Chen Sun","orcid":"https://orcid.org/0000-0002-4142-4008"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chen Sun","raw_affiliation_strings":["Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5082670242"],"corresponding_institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":0.6073,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.72488978,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"189","last_page":"198"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.7504581212997437},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6971268653869629},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6671671271324158},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6164721250534058},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5284718871116638},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.48805153369903564},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.42950835824012756},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.06844642758369446}],"concepts":[{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.7504581212997437},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6971268653869629},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6671671271324158},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6164721250534058},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5284718871116638},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.48805153369903564},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.42950835824012756},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.06844642758369446},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3357384.3357987","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3357384.3357987","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 28th ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5899999737739563,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1499991161","https://openalex.org/W1514535095","https://openalex.org/W1544092585","https://openalex.org/W1610356397","https://openalex.org/W1686810756","https://openalex.org/W1852777783","https://openalex.org/W1964763677","https://openalex.org/W1981613567","https://openalex.org/W2081613070","https://openalex.org/W2097117768","https://openalex.org/W2124219775","https://openalex.org/W2125263373","https://openalex.org/W2150295085","https://openalex.org/W2163605009","https://openalex.org/W2170738476","https://openalex.org/W2194775991","https://openalex.org/W2412782625","https://openalex.org/W2413794162","https://openalex.org/W2527508530","https://openalex.org/W2557728737","https://openalex.org/W2583200575","https://openalex.org/W2601450892","https://openalex.org/W2612867916","https://openalex.org/W2743200750","https://openalex.org/W2753634799","https://openalex.org/W2788522804","https://openalex.org/W2899505139","https://openalex.org/W2901894078","https://openalex.org/W2949197413","https://openalex.org/W2949605076","https://openalex.org/W2950344723","https://openalex.org/W2951538594","https://openalex.org/W2951583185","https://openalex.org/W2951638509","https://openalex.org/W2952020226","https://openalex.org/W2953022248","https://openalex.org/W2953106684","https://openalex.org/W2962714319","https://openalex.org/W2962843773","https://openalex.org/W2963081269","https://openalex.org/W2963173190","https://openalex.org/W2963499153","https://openalex.org/W2963703197","https://openalex.org/W2963717374","https://openalex.org/W2963821229","https://openalex.org/W3103511443"],"related_works":["https://openalex.org/W2085033728","https://openalex.org/W4285411112","https://openalex.org/W2171299904","https://openalex.org/W1647606319","https://openalex.org/W4390494008","https://openalex.org/W2922442631","https://openalex.org/W2053596378","https://openalex.org/W2168523118","https://openalex.org/W2073639911","https://openalex.org/W2043988397"],"abstract_inverted_index":{"Image":[0],"classification":[1,180],"models":[2,45,88],"take":[3],"image":[4,109,133,158],"pixels":[5,62,110,134,196],"as":[6],"input":[7],"and":[8,41,115,128,159,181,197],"predict":[9,74],"labels":[10,137],"in":[11,34,65,138,200,214],"a":[12,95,104,112,117,161],"predefined":[13],"taxonomy.":[14,141],"While":[15],"contextual":[16,113,153],"information":[17,76,154],"(e.g.":[18],"text":[19,40,85],"surrounding":[20],"an":[21],"image)":[22],"can":[23,80],"provide":[24],"valuable":[25],"orthogonal":[26],"signals":[27],"to":[28,86,111,135,151,207,239],"improve":[29,90],"classification,":[30],"the":[31,37,58,66,83,125,132,139,174,224,228,245],"typical":[32],"setting":[33,59],"literature":[35],"assumes":[36],"unavailability":[38],"of":[39,99,217,227,231],"thus":[42],"focuses":[43],"on":[44,49,57,169,195,244],"that":[46,71,89,97,107,120,209],"rely":[47],"purely":[48],"pixels.":[50],"In":[51],"this":[52],"work,":[53],"we":[54,69,73,79],"also":[55],"focus":[56],"where":[60],"only":[61],"are":[63],"available":[64],"input.":[67],"However,":[68],"demonstrate":[70,208,223],"if":[72],"textual":[75,122],"from":[77,124,131],"pixels,":[78],"subsequently":[81],"use":[82],"predicted":[84],"train":[87],"overall":[91],"performance.":[92],"We":[93,165,203,221],"propose":[94],"framework":[96,168,211,233],"consists":[98],"two":[100],"main":[101],"components:":[102],"(1)":[103],"phrase":[105,126,143],"generator":[106,127,144],"maps":[108],"phrase,":[114],"(2)":[116],"multimodal":[118,229],"model":[119],"uses":[121],"features":[123,130],"visual":[129],"produce":[136],"output":[140,163],"The":[142],"is":[145],"trained":[146],"using":[147],"web-based":[148],"query-image":[149],"pairs":[150],"incorporate":[152],"associated":[155],"with":[156],"each":[157],"has":[160],"large":[162],"space.":[164],"evaluate":[166],"our":[167,210,232],"diverse":[170],"benchmark":[171],"datasets":[172],"(specifically,":[173],"WebVision":[175],"dataset":[176,183],"for":[177,184],"evaluating":[178,185],"multi-class":[179],"OpenImages":[182],"multi-label":[186],"classification),":[187],"demonstrating":[188],"performance":[189],"improvements":[190,213],"over":[191],"approaches":[192],"based":[193],"exclusively":[194],"showcasing":[198],"benefits":[199,226],"prediction":[201],"interpretability.":[202],"additionally":[204],"present":[205],"results":[206],"provides":[212],"few-shot":[215],"learning":[216,243],"minimally":[218],"labeled":[219],"concepts.":[220],"further":[222],"unique":[225],"nature":[230],"by":[234],"utilizing":[235],"intermediate":[236],"image/text":[237],"co-embeddings":[238],"perform":[240],"baseline":[241],"zero-shot":[242],"ImageNet":[246],"dataset.":[247]},"counts_by_year":[{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
