{"id":"https://openalex.org/W3087516599","doi":"https://doi.org/10.1109/tcyb.2020.3015084","title":"Unsupervised Visual\u2013Textual Correlation Learning With Fine-Grained Semantic Alignment","display_name":"Unsupervised Visual\u2013Textual Correlation Learning With Fine-Grained Semantic Alignment","publication_year":2020,"publication_date":"2020-09-15","ids":{"openalex":"https://openalex.org/W3087516599","doi":"https://doi.org/10.1109/tcyb.2020.3015084","mag":"3087516599","pmid":"https://pubmed.ncbi.nlm.nih.gov/32931438"},"language":"en","primary_location":{"id":"doi:10.1109/tcyb.2020.3015084","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2020.3015084","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047811387","display_name":"Yuxin Peng","orcid":"https://orcid.org/0000-0001-7658-3845"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuxin Peng","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039035501","display_name":"Zhaoda Ye","orcid":"https://orcid.org/0000-0002-8458-4140"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaoda Ye","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026142528","display_name":"Jinwei Qi","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinwei Qi","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021725678","display_name":"Yunkan Zhuo","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunkan Zhuo","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5047811387"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.8839,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.76728095,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"52","issue":"5","first_page":"3669","last_page":"3683"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8065298795700073},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6800267696380615},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.6643074750900269},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6258789896965027},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.617817759513855},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5462150573730469},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.4841369092464447},{"id":"https://openalex.org/keywords/semantic-gap","display_name":"Semantic gap","score":0.48182907700538635},{"id":"https://openalex.org/keywords/correlation","display_name":"Correlation","score":0.4680711030960083},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4607814848423004},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.45886725187301636},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4578271508216858},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.44085052609443665},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.42099708318710327},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.42032840847969055},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.41730695962905884},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.19114139676094055}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8065298795700073},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6800267696380615},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.6643074750900269},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6258789896965027},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.617817759513855},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5462150573730469},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.4841369092464447},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.48182907700538635},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.4680711030960083},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4607814848423004},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.45886725187301636},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4578271508216858},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.44085052609443665},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.42099708318710327},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.42032840847969055},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.41730695962905884},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.19114139676094055},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D001921","descriptor_name":"Brain","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001921","descriptor_name":"Brain","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001921","descriptor_name":"Brain","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D012660","descriptor_name":"Semantics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012660","descriptor_name":"Semantics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D012660","descriptor_name":"Semantics","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016571","descriptor_name":"Neural Networks, Computer","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":2,"locations":[{"id":"doi:10.1109/tcyb.2020.3015084","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2020.3015084","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},{"id":"pmid:32931438","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/32931438","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on cybernetics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6499999761581421,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G7427494650","display_name":null,"funder_award_id":"61925201","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8355516938","display_name":null,"funder_award_id":"61771025","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1123427201","https://openalex.org/W1523385540","https://openalex.org/W1902237438","https://openalex.org/W1930223417","https://openalex.org/W1964073652","https://openalex.org/W2015394094","https://openalex.org/W2022398331","https://openalex.org/W2053667957","https://openalex.org/W2070753207","https://openalex.org/W2099471712","https://openalex.org/W2100235303","https://openalex.org/W2106277773","https://openalex.org/W2112193096","https://openalex.org/W2157331557","https://openalex.org/W2185175083","https://openalex.org/W2194775991","https://openalex.org/W2210322478","https://openalex.org/W2250378130","https://openalex.org/W2277195237","https://openalex.org/W2295088417","https://openalex.org/W2316082076","https://openalex.org/W2326180695","https://openalex.org/W2342543219","https://openalex.org/W2414522539","https://openalex.org/W2563587296","https://openalex.org/W2574447816","https://openalex.org/W2605649771","https://openalex.org/W2606473278","https://openalex.org/W2606965845","https://openalex.org/W2739181657","https://openalex.org/W2745461083","https://openalex.org/W2765440071","https://openalex.org/W2765977864","https://openalex.org/W2896798564","https://openalex.org/W2962856082","https://openalex.org/W2962964995","https://openalex.org/W2963000732","https://openalex.org/W2963467339","https://openalex.org/W2964081303","https://openalex.org/W2964120214","https://openalex.org/W2964216321","https://openalex.org/W4237723258","https://openalex.org/W4299801216","https://openalex.org/W6631216910","https://openalex.org/W6732292492"],"related_works":["https://openalex.org/W2376984068","https://openalex.org/W2506386910","https://openalex.org/W2089969684","https://openalex.org/W2002918846","https://openalex.org/W2503910294","https://openalex.org/W2117928543","https://openalex.org/W2034539438","https://openalex.org/W2384506582","https://openalex.org/W2019897126","https://openalex.org/W2120663665"],"abstract_inverted_index":{"With":[0],"the":[1,8,17,28,41,54,59,67,130,146,161,199,209,223,231,239,243,254,277,296,301,341],"rapid":[2,14],"growth":[3],"of":[4,31,58,69,149,343],"multimedia":[5],"data":[6,35,71,136,233,251],"on":[7,66,330],"Internet,":[9],"there":[10],"has":[11],"been":[12],"a":[13,112],"rise":[15],"in":[16,137,156,234,323],"demand":[18],"for":[19,44,87,258],"visual-textual":[20,88,171,265,312,320],"cross-media":[21,42,192,273,321],"retrieval":[22,322],"between":[23,114,132,202,213,256],"images":[24,115,142,214,227,307],"and":[25,33,95,116,123,134,140,143,204,215,217,249,262,282,295,308,337],"sentences.":[26],"However,":[27],"heterogeneous":[29,200],"property":[30],"visual":[32,133,203,248,279],"textual":[34,135,205,250,283],"brings":[36],"huge":[37],"challenges":[38],"to":[39,98,110,119,176,197,222,228,252,271,310,318],"measure":[40,208],"similarity":[43],"retrieval.":[45],"Although":[46],"existing":[47],"methods":[48],"have":[49],"achieved":[50],"great":[51],"progress":[52],"with":[53,72,285],"strong":[55],"learning":[56,173,261],"ability":[57],"deep":[60],"neural":[61],"network,":[62],"they":[63],"rely":[64],"heavily":[65],"scale":[68],"training":[70,232],"manual":[73,181,328],"annotation,":[74,105],"that":[75],"is,":[76],"either":[77],"pairwise":[78,102],"image-sentence":[79],"annotation":[80,83,329],"or":[81,103],"category":[82,104],"as":[84,187,289,291],"supervised":[85],"information":[86,304],"correlation":[89,113,131,172,193,260,274],"learning,":[90],"which":[91,339],"are":[92,185,316],"extremely":[93],"labor":[94],"time":[96],"consuming":[97],"collect.":[99],"Without":[100],"any":[101,180,327],"it":[106],"is":[107,195,269],"highly":[108],"challenging":[109],"construct":[111,177],"sentences":[117,144,220,309],"due":[118],"their":[120],"inconsistent":[121],"distributions":[122],"representations.":[124],"But":[125],"people":[126],"can":[127,152,241,298],"naturally":[128],"understand":[129],"high-level":[138],"semantic,":[139],"those":[141],"containing":[145],"same":[147],"group":[148],"semantic":[150,210,244,267,303],"concepts":[151,224],"be":[153],"easily":[154],"matched":[155],"human":[157,163],"brain.":[158],"Inspired":[159],"by":[160,275],"above":[162],"cognitive":[164],"process,":[165],"this":[166],"article":[167],"proposes":[168],"an":[169,235],"unsupervised":[170,190,236,264,324],"(UVCL)":[174],"approach":[175,240],"correlations":[178],"without":[179,326],"annotation.":[182],"The":[183],"contributions":[184],"summarized":[186],"follows:":[188],"1)":[189],"semantic-guided":[191,292],"mining":[194],"proposed":[196,270,345],"bridge":[198],"gap":[201,255],"data.":[206],"We":[207],"matching":[211],"degree":[212],"sentences,":[216],"generate":[218],"descriptive":[219],"according":[221],"extracted":[225],"from":[226],"further":[229,259],"augment":[230],"manner.":[237],"Therefore,":[238],"exploit":[242],"knowledge":[245],"within":[246,305],"both":[247,306],"reduce":[253],"them":[257],"2)":[263],"fine-grained":[266,278,286,302],"alignment":[268],"learn":[272],"aligning":[276],"local":[280],"patches":[281],"keywords":[284],"soft":[287],"attention":[288],"well":[290],"hard":[293],"attention,":[294],"results":[297],"effectively":[299],"highlight":[300],"boost":[311],"alignment.":[313],"Extensive":[314],"experiments":[315],"conducted":[317],"perform":[319],"setting":[325],"two":[331],"widely":[332],"used":[333],"datasets,":[334],"namely,":[335],"Flickr-30K":[336],"MS-COCO,":[338],"verify":[340],"effectiveness":[342],"our":[344],"UVCL":[346],"approach.":[347]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3}],"updated_date":"2026-03-17T09:09:15.849793","created_date":"2025-10-10T00:00:00"}
