{"id":"https://openalex.org/W4210894218","doi":"https://doi.org/10.1109/tpami.2022.3148470","title":"Image-Text Embedding Learning via Visual and Textual Semantic Reasoning","display_name":"Image-Text Embedding Learning via Visual and Textual Semantic Reasoning","publication_year":2022,"publication_date":"2022-02-07","ids":{"openalex":"https://openalex.org/W4210894218","doi":"https://doi.org/10.1109/tpami.2022.3148470","pmid":"https://pubmed.ncbi.nlm.nih.gov/35130144"},"language":"en","primary_location":{"id":"doi:10.1109/tpami.2022.3148470","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2022.3148470","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100654289","display_name":"Kunpeng Li","orcid":"https://orcid.org/0000-0001-5805-793X"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kunpeng Li","raw_affiliation_strings":["Facebook Reality Labs, Burlingame, CA, USA","NEC Laboratories America Inc., Princeton, NJ, USA"],"affiliations":[{"raw_affiliation_string":"Facebook Reality Labs, Burlingame, CA, USA","institution_ids":["https://openalex.org/I4210114444"]},{"raw_affiliation_string":"NEC Laboratories America Inc., Princeton, NJ, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074865219","display_name":"Yulun Zhang","orcid":"https://orcid.org/0000-0002-2288-5079"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yulun Zhang","raw_affiliation_strings":["Computer Vision Lab, ETH Z&#x00FC;rich, Z&#x00FC;rich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Computer Vision Lab, ETH Z&#x00FC;rich, Z&#x00FC;rich, Switzerland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100399953","display_name":"Kai Li","orcid":"https://orcid.org/0000-0002-9027-0914"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kai Li","raw_affiliation_strings":["Facebook Reality Labs, Burlingame, CA, USA","NEC Laboratories America Inc., Princeton, NJ, USA"],"affiliations":[{"raw_affiliation_string":"Facebook Reality Labs, Burlingame, CA, USA","institution_ids":["https://openalex.org/I4210114444"]},{"raw_affiliation_string":"NEC Laboratories America Inc., Princeton, NJ, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019292176","display_name":"Yuanyuan Li","orcid":"https://orcid.org/0000-0002-2851-8528"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuanyuan Li","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005819096","display_name":"Yun Fu","orcid":"https://orcid.org/0000-0002-5098-2853"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yun Fu","raw_affiliation_strings":["Northeastern University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, MA, USA","institution_ids":["https://openalex.org/I12912129"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100654289"],"corresponding_institution_ids":["https://openalex.org/I4210114444"],"apc_list":null,"apc_paid":null,"fwci":13.5846,"has_fulltext":false,"cited_by_count":140,"citation_normalized_percentile":{"value":0.9935669,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"45","issue":"1","first_page":"641","last_page":"656"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.795779824256897},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6884031295776367},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6266566514968872},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.5971736907958984},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.5337377786636353},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5184199810028076},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4954594373703003},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.47352099418640137},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.44392552971839905},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4389956295490265},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.43401145935058594},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.412891685962677},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33801034092903137},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3362308144569397}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.795779824256897},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6884031295776367},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6266566514968872},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.5971736907958984},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.5337377786636353},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5184199810028076},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4954594373703003},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.47352099418640137},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.44392552971839905},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4389956295490265},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.43401145935058594},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.412891685962677},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33801034092903137},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3362308144569397},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tpami.2022.3148470","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpami.2022.3148470","pdf_url":null,"source":{"id":"https://openalex.org/S199944782","display_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","issn_l":"0162-8828","issn":["0162-8828","1939-3539","2160-9292"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Pattern Analysis and Machine Intelligence","raw_type":"journal-article"},{"id":"pmid:35130144","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35130144","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on pattern analysis and machine intelligence","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.699999988079071,"id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G2967648683","display_name":null,"funder_award_id":"1651902","funder_id":"https://openalex.org/F4320335353","funder_display_name":"National Science Foundation of Sri Lanka"},{"id":"https://openalex.org/G3571051887","display_name":null,"funder_award_id":"W911NF-17-1-0367","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"}],"funders":[{"id":"https://openalex.org/F4320335353","display_name":"National Science Foundation of Sri Lanka","ror":"https://ror.org/010xaa060"},{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":108,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1522301498","https://openalex.org/W1527575280","https://openalex.org/W1665115054","https://openalex.org/W1756422141","https://openalex.org/W1849277567","https://openalex.org/W1861492603","https://openalex.org/W1905882502","https://openalex.org/W1924770834","https://openalex.org/W1931639407","https://openalex.org/W1949478088","https://openalex.org/W2003447360","https://openalex.org/W2059917035","https://openalex.org/W2062183687","https://openalex.org/W2064675550","https://openalex.org/W2097073572","https://openalex.org/W2107019937","https://openalex.org/W2123024445","https://openalex.org/W2139501017","https://openalex.org/W2141461755","https://openalex.org/W2158131535","https://openalex.org/W2158937425","https://openalex.org/W2159243025","https://openalex.org/W2185175083","https://openalex.org/W2194775991","https://openalex.org/W2221625691","https://openalex.org/W2277195237","https://openalex.org/W2333091651","https://openalex.org/W2558460151","https://openalex.org/W2745461083","https://openalex.org/W2770804203","https://openalex.org/W2774267535","https://openalex.org/W2776207810","https://openalex.org/W2776638780","https://openalex.org/W2778100917","https://openalex.org/W2808877322","https://openalex.org/W2890531016","https://openalex.org/W2896457183","https://openalex.org/W2903529742","https://openalex.org/W2905169750","https://openalex.org/W2950096400","https://openalex.org/W2954400107","https://openalex.org/W2962851944","https://openalex.org/W2962858109","https://openalex.org/W2962884155","https://openalex.org/W2962964995","https://openalex.org/W2963040148","https://openalex.org/W2963091558","https://openalex.org/W2963342403","https://openalex.org/W2963448089","https://openalex.org/W2963467339","https://openalex.org/W2963606198","https://openalex.org/W2963907629","https://openalex.org/W2964015378","https://openalex.org/W2964094751","https://openalex.org/W2964120214","https://openalex.org/W2964727037","https://openalex.org/W2966715458","https://openalex.org/W2970927600","https://openalex.org/W2979304729","https://openalex.org/W2982073904","https://openalex.org/W2982078236","https://openalex.org/W2988823324","https://openalex.org/W2990798426","https://openalex.org/W2991633843","https://openalex.org/W2994818707","https://openalex.org/W2997525715","https://openalex.org/W2998356391","https://openalex.org/W3034275286","https://openalex.org/W3034727271","https://openalex.org/W3035212740","https://openalex.org/W3035424743","https://openalex.org/W3035454331","https://openalex.org/W3035552787","https://openalex.org/W3035588244","https://openalex.org/W3035688398","https://openalex.org/W3037865115","https://openalex.org/W3090449556","https://openalex.org/W3091588028","https://openalex.org/W3166304536","https://openalex.org/W4206471589","https://openalex.org/W4287900618","https://openalex.org/W4299522971","https://openalex.org/W4299801216","https://openalex.org/W6620707391","https://openalex.org/W6631190155","https://openalex.org/W6631516269","https://openalex.org/W6637106369","https://openalex.org/W6637805884","https://openalex.org/W6639102338","https://openalex.org/W6640212811","https://openalex.org/W6678470764","https://openalex.org/W6682962330","https://openalex.org/W6683512859","https://openalex.org/W6685133223","https://openalex.org/W6685183736","https://openalex.org/W6726873649","https://openalex.org/W6738893770","https://openalex.org/W6746798562","https://openalex.org/W6747225742","https://openalex.org/W6754725917","https://openalex.org/W6755207826","https://openalex.org/W6766904570","https://openalex.org/W6767194493","https://openalex.org/W6767362881","https://openalex.org/W6771376659","https://openalex.org/W6779473860","https://openalex.org/W6779579431"],"related_works":["https://openalex.org/W2965546495","https://openalex.org/W4389116644","https://openalex.org/W2153315159","https://openalex.org/W3103844505","https://openalex.org/W3208297503","https://openalex.org/W3119773509","https://openalex.org/W2889153461","https://openalex.org/W2964117661","https://openalex.org/W4388405611","https://openalex.org/W2619127353"],"abstract_inverted_index":{"As":[0],"a":[1,15,52,128,153],"bridge":[2],"between":[3,10,58,201],"language":[4],"and":[5,12,47,60,74,89,105,124,140,203,261],"vision":[6],"domains,":[7],"cross-modal":[8],"retrieval":[9],"images":[11,59,202],"texts":[13],"is":[14],"hot":[16],"research":[17],"topic":[18],"in":[19,34,131],"recent":[20,150,218,228],"years.":[21],"It":[22],"remains":[23],"challenging":[24],"because":[25],"the":[26,35,68,87,102,110,114,117,132,159,168,173,194,207,227,250],"current":[27],"image":[28],"representations":[29,108,120],"usually":[30],"lack":[31],"semantic":[32,69,95,125,180],"concepts":[33,126],"corresponding":[36,133],"sentence":[37],"captions.":[38,204],"To":[39],"address":[40],"this":[41],"issue,":[42],"we":[43,247],"introduce":[44],"an":[45],"intuitive":[46],"interpretable":[48],"model":[49,65],"to":[50,92,158,172,197,240],"learn":[51],"common":[53],"embedding":[54],"space":[55],"for":[56,109],"alignments":[57],"text":[61,134],"descriptions.":[62],"Specifically,":[63],"our":[64,146,161,182,268],"first":[66],"incorporates":[67],"relationship":[70,82],"information":[71,104],"into":[72],"visual":[73,119,179],"textual":[75],"features":[76],"by":[77,190],"performing":[78],"region":[79],"or":[80],"word":[81],"reasoning.":[83],"Then":[84],"it":[85],"utilizes":[86],"gate":[88],"memory":[90],"mechanism":[91],"perform":[93],"global":[94,252],"reasoning":[96],"on":[97,137,193,267],"these":[98],"relationship-enhanced":[99],"features,":[100],"select":[101],"discriminative":[103],"gradually":[106],"grow":[107],"whole":[111],"scene.":[112],"Through":[113],"alignment":[115],"learning,":[116],"learned":[118],"capture":[121],"key":[122],"objects":[123],"of":[127,225,230],"scene":[129],"as":[130],"caption.":[135],"Experiments":[136,205],"MS-COCO":[138],"[1]":[139],"Flickr30K":[141],"[2]":[142],"datasets":[143],"validate":[144,206],"that":[145,249],"method":[147],"surpasses":[148],"many":[149,217],"state-of-the-arts":[151],"with":[152,178,220],"clear":[154],"margin.":[155],"In":[156],"addition":[157],"effectiveness,":[160],"methods":[162,183,209,219],"are":[163,210],"also":[164],"very":[165,187,258],"efficient":[166,260],"at":[167],"inference":[169],"stage.":[170],"Thanks":[171],"effective":[174],"overall":[175],"representation":[176],"learning":[177],"reasoning,":[181],"can":[184,255],"already":[185],"achieve":[186,262],"strong":[188],"performance":[189,243,265],"only":[191],"relying":[192],"simple":[195,251],"inner-product":[196],"obtain":[198],"similarity":[199],"scores":[200],"proposed":[208],"more":[211],"than":[212,216],"30-75":[213],"times":[214],"faster":[215],"code":[221],"public":[222],"available.":[223],"Instead":[224],"following":[226],"trend":[229],"using":[231],"complex":[232],"local":[233],"matching":[234,253],"strategies":[235],"[3],":[236],"[4],":[237],"[5],":[238],"[6]":[239],"pursue":[241],"good":[242],"while":[244],"sacrificing":[245],"efficiency,":[246],"show":[248],"strategy":[254],"still":[256],"be":[257],"effective,":[259],"even":[263],"better":[264],"based":[266],"framework.":[269]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":47},{"year":2024,"cited_by_count":49},{"year":2023,"cited_by_count":31},{"year":2022,"cited_by_count":6}],"updated_date":"2026-04-02T15:55:50.835912","created_date":"2025-10-10T00:00:00"}
