{"id":"https://openalex.org/W4386065291","doi":"https://doi.org/10.1109/tcsvt.2023.3307554","title":"Enhanced Semantic Similarity Learning Framework for Image-Text Matching","display_name":"Enhanced Semantic Similarity Learning Framework for Image-Text Matching","publication_year":2023,"publication_date":"2023-08-22","ids":{"openalex":"https://openalex.org/W4386065291","doi":"https://doi.org/10.1109/tcsvt.2023.3307554"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2023.3307554","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3307554","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100342380","display_name":"Kun Zhang","orcid":"https://orcid.org/0000-0003-2140-2546"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Kun Zhang","raw_affiliation_strings":["School of Information Science and Technology, University of Science and Technology of China, Anhui, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Anhui, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081768707","display_name":"Bo Hu","orcid":"https://orcid.org/0000-0002-6540-5119"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Hu","raw_affiliation_strings":["School of Information Science and Technology, University of Science and Technology of China, Anhui, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Anhui, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072035828","display_name":"Huatian Zhang","orcid":"https://orcid.org/0000-0002-9967-1992"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huatian Zhang","raw_affiliation_strings":["School of Information Science and Technology, University of Science and Technology of China, Anhui, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Technology, University of Science and Technology of China, Anhui, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057987358","display_name":"Zhe Li","orcid":"https://orcid.org/0000-0003-4716-614X"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhe Li","raw_affiliation_strings":["School of Cyberspace Science and Technology, University of Science and Technology of China, Anhui, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Cyberspace Science and Technology, University of Science and Technology of China, Anhui, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023341829","display_name":"Zhendong Mao","orcid":"https://orcid.org/0000-0001-5739-8126"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhendong Mao","raw_affiliation_strings":["School of Cyberspace Science and Technology, University of Science and Technology of China, Anhui, Hefei, China"],"affiliations":[{"raw_affiliation_string":"School of Cyberspace Science and Technology, University of Science and Technology of China, Anhui, Hefei, China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100342380"],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":2.8887,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.92716288,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"34","issue":"4","first_page":"2973","last_page":"2988"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7108643651008606},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5701808333396912},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.5690188407897949},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.49238812923431396},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4882340729236603},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.45099544525146484},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4306093454360962},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4276929199695587},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4144766330718994},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.39192357659339905},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17843225598335266}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7108643651008606},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5701808333396912},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.5690188407897949},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.49238812923431396},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4882340729236603},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.45099544525146484},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4306093454360962},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4276929199695587},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4144766330718994},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.39192357659339905},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17843225598335266},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2023.3307554","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3307554","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.7200000286102295}],"awards":[{"id":"https://openalex.org/G7854910236","display_name":null,"funder_award_id":"62222212","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":78,"referenced_works":["https://openalex.org/W1527575280","https://openalex.org/W1905882502","https://openalex.org/W1916445035","https://openalex.org/W1957706851","https://openalex.org/W2122950138","https://openalex.org/W2139945125","https://openalex.org/W2250539671","https://openalex.org/W2277195237","https://openalex.org/W2745461083","https://openalex.org/W2774267535","https://openalex.org/W2896457183","https://openalex.org/W2956018683","https://openalex.org/W2962964995","https://openalex.org/W2963258075","https://openalex.org/W2963389687","https://openalex.org/W2963467339","https://openalex.org/W2964120214","https://openalex.org/W2964727037","https://openalex.org/W2965848243","https://openalex.org/W2979304729","https://openalex.org/W2981165461","https://openalex.org/W2982078236","https://openalex.org/W2988089887","https://openalex.org/W2988823324","https://openalex.org/W2997403743","https://openalex.org/W2997525715","https://openalex.org/W2998215884","https://openalex.org/W3005971801","https://openalex.org/W3010277541","https://openalex.org/W3034882096","https://openalex.org/W3035212740","https://openalex.org/W3035454331","https://openalex.org/W3035605030","https://openalex.org/W3091588028","https://openalex.org/W3094172275","https://openalex.org/W3099614098","https://openalex.org/W3102629312","https://openalex.org/W3108373531","https://openalex.org/W3110042533","https://openalex.org/W3111984153","https://openalex.org/W3115819524","https://openalex.org/W3118694826","https://openalex.org/W3131251978","https://openalex.org/W3136792391","https://openalex.org/W3155230099","https://openalex.org/W3163971663","https://openalex.org/W3175888430","https://openalex.org/W3177343494","https://openalex.org/W3190055600","https://openalex.org/W4200051180","https://openalex.org/W4205219932","https://openalex.org/W4206314411","https://openalex.org/W4210894218","https://openalex.org/W4224952037","https://openalex.org/W4283805294","https://openalex.org/W4283812943","https://openalex.org/W4285118104","https://openalex.org/W4293795227","https://openalex.org/W4296437558","https://openalex.org/W4297813007","https://openalex.org/W4312761738","https://openalex.org/W4318718936","https://openalex.org/W4322576838","https://openalex.org/W4323338501","https://openalex.org/W4323797084","https://openalex.org/W4360897523","https://openalex.org/W4385245566","https://openalex.org/W4391147898","https://openalex.org/W6631516269","https://openalex.org/W6640257717","https://openalex.org/W6676647902","https://openalex.org/W6747225742","https://openalex.org/W6755207826","https://openalex.org/W6767983274","https://openalex.org/W6791353385","https://openalex.org/W6810334672","https://openalex.org/W6849177959","https://openalex.org/W6852776617"],"related_works":["https://openalex.org/W1972035260","https://openalex.org/W2375480909","https://openalex.org/W2353314428","https://openalex.org/W2012019886","https://openalex.org/W2388928357","https://openalex.org/W4301594054","https://openalex.org/W2114797768","https://openalex.org/W2380654781","https://openalex.org/W2176214140","https://openalex.org/W2516873349"],"abstract_inverted_index":{"Image-text":[0],"matching":[1],"is":[2],"a":[3,36,43,80,95],"fundamental":[4],"task":[5],"to":[6,35,49,53,99,137],"bridge":[7],"vision":[8],"and":[9,28,91,107,140,160],"language.":[10],"The":[11],"critical":[12],"challenge":[13],"lies":[14],"in":[15,172],"accurately":[16],"learning":[17,85,153],"the":[18,47,58,62,70,101,114,149,163],"semantic":[19,55,75,83,133,165],"similarity":[20,84,144],"between":[21,105],"these":[22],"two":[23],"heterogeneous":[24],"modalities.":[25],"For":[26],"visual":[27,106],"textual":[29,108],"features,":[30],"existing":[31],"methods":[32],"typically":[33],"default":[34],"static":[37],"dimensional":[38],"correspondence":[39,72,104,152],"mechanism,":[40,121],"i.e.,":[41],"using":[42],"single":[44],"dimension":[45],"as":[46],"measure-unit":[48],"perform":[50],"one-to-one":[51],"correspondence,":[52],"examine":[54,100],"similarity,":[56],"e.g.,":[57],"cosine/Euclidean":[59],"distance":[60],"or":[61],"weighted":[63],"similarity.":[64,166],"In":[65],"this":[66],"paper,":[67],"different":[68],"from":[69],"single-dimensional":[71],"with":[73,118,154],"limited":[74],"expressive":[76,135],"capability,":[77],"we":[78,111,147],"propose":[79],"novel":[81],"enhanced":[82,103,151],"(ESL),":[86],"which":[87,122,158],"generalizes":[88],"both":[89],"measure-units":[90,126],"their":[92],"correspondences":[93],"into":[94],"dynamic":[96],"learnable":[97],"framework":[98],"multi-dimensional":[102,116],"features.":[109],"Specifically,":[110],"first":[112],"devise":[113,148],"intra-modal":[115],"aggregators":[117],"iterative":[119],"enhancing":[120],"dynamically":[123],"captures":[124],"new":[125],"integrated":[127],"by":[128],"hierarchical":[129],"multi-dimensions,":[130],"producing":[131],"diverse":[132],"combinatorial":[134],"capabilities":[136],"provide":[138],"richer":[139],"discriminative":[141],"information":[142],"for":[143],"examination.":[145],"Then,":[146],"inter-modal":[150],"sparse":[155],"contribution":[156],"degrees,":[157],"comprehensively":[159],"efficiently":[161],"determines":[162],"cross-modal":[164],"Extensive":[167],"experiments":[168],"verify":[169],"its":[170],"superiority":[171],"achieving":[173],"state-of-the-art":[174],"performance.":[175],"Codes":[176],"will":[177],"be":[178],"released":[179],"at":[180],"<uri":[181],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[182],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://github.com/CrossmodalGroup/ESL</uri>":[183],".":[184]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":16},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
