{"id":"https://openalex.org/W4323338501","doi":"https://doi.org/10.1109/tcsvt.2023.3253548","title":"ESA: External Space Attention Aggregation for Image-Text Retrieval","display_name":"ESA: External Space Attention Aggregation for Image-Text Retrieval","publication_year":2023,"publication_date":"2023-03-06","ids":{"openalex":"https://openalex.org/W4323338501","doi":"https://doi.org/10.1109/tcsvt.2023.3253548"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2023.3253548","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3253548","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100754391","display_name":"Hongguang Zhu","orcid":"https://orcid.org/0000-0002-1356-5153"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongguang Zhu","raw_affiliation_strings":["Institute of Information Science, Beijing Jiaotong University, Beijing, China","Beijing Key Laboratory of Advanced Information Science and Network Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]},{"raw_affiliation_string":"Beijing Key Laboratory of Advanced Information Science and Network Technology, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100673406","display_name":"Chunjie Zhang","orcid":"https://orcid.org/0000-0002-1161-8995"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunjie Zhang","raw_affiliation_strings":["Institute of Information Science, Beijing Jiaotong University, Beijing, China","Beijing Key Laboratory of Advanced Information Science and Network Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]},{"raw_affiliation_string":"Beijing Key Laboratory of Advanced Information Science and Network Technology, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087043856","display_name":"Yunchao Wei","orcid":"https://orcid.org/0000-0002-2812-8781"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunchao Wei","raw_affiliation_strings":["Institute of Information Science, Beijing Jiaotong University, Beijing, China","Beijing Key Laboratory of Advanced Information Science and Network Technology, Beijing, China","Peng Cheng Laboratory, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]},{"raw_affiliation_string":"Beijing Key Laboratory of Advanced Information Science and Network Technology, Beijing, China","institution_ids":[]},{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072091414","display_name":"Shujuan Huang","orcid":"https://orcid.org/0000-0001-6045-8652"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shujuan Huang","raw_affiliation_strings":["Institute of Information Science, Beijing Jiaotong University, Beijing, China","Beijing Key Laboratory of Advanced Information Science and Network Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]},{"raw_affiliation_string":"Beijing Key Laboratory of Advanced Information Science and Network Technology, Beijing, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100362745","display_name":"Yao Zhao","orcid":"https://orcid.org/0000-0002-8581-9554"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yao Zhao","raw_affiliation_strings":["Institute of Information Science, Beijing Jiaotong University, Beijing, China","Peng Cheng Laboratory, Shenzhen, China","Beijing Key Laboratory of Advanced Information Science and Network Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Science, Beijing Jiaotong University, Beijing, China","institution_ids":["https://openalex.org/I21193070"]},{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]},{"raw_affiliation_string":"Beijing Key Laboratory of Advanced Information Science and Network Technology, Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100754391"],"corresponding_institution_ids":["https://openalex.org/I21193070"],"apc_list":null,"apc_paid":null,"fwci":5.1843,"has_fulltext":false,"cited_by_count":43,"citation_normalized_percentile":{"value":0.96740026,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"33","issue":"10","first_page":"6131","last_page":"6143"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.842392086982727},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6695325970649719},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.49217668175697327},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4851754307746887},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4569123685359955},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.45672211050987244},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.434573769569397},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42369601130485535},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.41439956426620483},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4034569263458252},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.40287381410598755},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.327424019575119}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.842392086982727},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6695325970649719},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.49217668175697327},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4851754307746887},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4569123685359955},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.45672211050987244},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.434573769569397},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42369601130485535},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.41439956426620483},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4034569263458252},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.40287381410598755},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.327424019575119},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2023.3253548","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3253548","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3910816902","display_name":null,"funder_award_id":"2018AAA0102100","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G5043621091","display_name":null,"funder_award_id":"62072026","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G582803609","display_name":null,"funder_award_id":"JQ20022","funder_id":"https://openalex.org/F4320322919","funder_display_name":"Natural Science Foundation of Beijing Municipality"},{"id":"https://openalex.org/G7536822468","display_name":null,"funder_award_id":"U1936212","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G927149482","display_name":null,"funder_award_id":"K22RC00010","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322919","display_name":"Natural Science Foundation of Beijing Municipality","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":81,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1527575280","https://openalex.org/W1686810756","https://openalex.org/W1861492603","https://openalex.org/W1905882502","https://openalex.org/W2086504823","https://openalex.org/W2109586012","https://openalex.org/W2123024445","https://openalex.org/W2157331557","https://openalex.org/W2170605888","https://openalex.org/W2185175083","https://openalex.org/W2187089797","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2550553598","https://openalex.org/W2560730294","https://openalex.org/W2606965845","https://openalex.org/W2745461083","https://openalex.org/W2774267535","https://openalex.org/W2842511635","https://openalex.org/W2886641317","https://openalex.org/W2896457183","https://openalex.org/W2956018683","https://openalex.org/W2962964995","https://openalex.org/W2963467339","https://openalex.org/W2963469388","https://openalex.org/W2963518342","https://openalex.org/W2964120214","https://openalex.org/W2964187781","https://openalex.org/W2966715458","https://openalex.org/W2981586349","https://openalex.org/W2987671777","https://openalex.org/W2988823324","https://openalex.org/W2997525715","https://openalex.org/W2998702515","https://openalex.org/W3035454331","https://openalex.org/W3035605030","https://openalex.org/W3090449556","https://openalex.org/W3091588028","https://openalex.org/W3092820619","https://openalex.org/W3094172275","https://openalex.org/W3110042533","https://openalex.org/W3118694826","https://openalex.org/W3126337491","https://openalex.org/W3130289102","https://openalex.org/W3131251978","https://openalex.org/W3133825286","https://openalex.org/W3155230099","https://openalex.org/W3166304536","https://openalex.org/W3166396011","https://openalex.org/W3168433561","https://openalex.org/W3171668871","https://openalex.org/W3175095612","https://openalex.org/W3175888430","https://openalex.org/W3182937942","https://openalex.org/W3189718596","https://openalex.org/W3206477365","https://openalex.org/W3213100861","https://openalex.org/W4210894218","https://openalex.org/W4213348104","https://openalex.org/W4226391640","https://openalex.org/W4283812943","https://openalex.org/W4285118104","https://openalex.org/W4297808394","https://openalex.org/W4385245566","https://openalex.org/W6620707391","https://openalex.org/W6631516269","https://openalex.org/W6637373629","https://openalex.org/W6639102338","https://openalex.org/W6666761814","https://openalex.org/W6676497082","https://openalex.org/W6678470764","https://openalex.org/W6684723771","https://openalex.org/W6739901393","https://openalex.org/W6747225742","https://openalex.org/W6755207826","https://openalex.org/W6766904570","https://openalex.org/W6781720285","https://openalex.org/W6790019176","https://openalex.org/W6790904380","https://openalex.org/W6791353385"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W2745001401","https://openalex.org/W4321353415","https://openalex.org/W2130974462","https://openalex.org/W972276598","https://openalex.org/W4246352526","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W2087343574","https://openalex.org/W2121910908"],"abstract_inverted_index":{"Due":[0],"to":[1,23,47,65,86,90,114,138],"the":[2,43,62,70,75,78,99,106,133,140,147,156,176,189,194],"large":[3],"gap":[4],"between":[5],"vision":[6],"and":[7,11,93,145,168,182,233],"language":[8],"modalities,":[9],"effective":[10],"efficient":[12,102],"image-text":[13,31,208],"retrieval":[14,26,49,230],"is":[15],"still":[16],"an":[17],"unsolved":[18],"problem.":[19],"Recent":[20],"progress":[21],"devotes":[22],"unilaterally":[24],"pursuing":[25],"accuracy":[27],"by":[28],"either":[29],"entangled":[30],"interaction":[32],"or":[33],"large-scale":[34,55],"vision-language":[35,195],"pre-training":[36,196],"in":[37,69,217],"a":[38,88],"brute":[39],"force":[40],"way.":[41],"However,":[42],"former":[44],"often":[45],"leads":[46],"unacceptable":[48],"time":[50],"explosion":[51],"when":[52],"deploying":[53],"on":[54,61,98,126,161,229],"databases.":[56],"The":[57,152],"latter":[58],"heavily":[59],"relies":[60],"extra":[63],"corpus":[64],"learn":[66],"better":[67],"alignment":[68,148],"feature":[71],"space":[72,142],"while":[73],"obscuring":[74],"contribution":[76],"of":[77,101,118,143,149,158,179],"network":[79],"architecture.":[80],"In":[81],"this":[82,96],"work,":[83],"we":[84,104,130],"aim":[85],"investigate":[87],"trade-off":[89],"balance":[91],"effectiveness":[92,157],"efficiency.":[94],"To":[95],"end,":[97],"premise":[100],"retrieval,":[103],"propose":[105,132],"plug-and-play":[107],"External":[108],"Space":[109],"attention":[110],"Aggregation":[111],"(ESA)":[112],"module":[113],"enable":[115],"element-wise":[116],"fusion":[117],"modal":[119,178],"features":[120],"under":[121],"spatial":[122,128],"dimensional":[123],"attention.":[124],"Based":[125],"flexible":[127],"awareness,":[129],"further":[131,187],"Self-Expanding":[134],"triplet":[135],"Loss":[136],"(SEL)":[137],"expand":[139,188],"representation":[141],"samples":[144],"optimize":[146],"embedding":[150],"space.":[151],"extensive":[153],"experiments":[154],"demonstrate":[155],"our":[159,171,183,212],"method":[160,198],"two":[162],"benchmark":[163],"datasets.":[164],"With":[165],"identical":[166],"visual":[167],"textual":[169],"backbones,":[170],"single":[172],"model":[173,185],"has":[174],"outperformed":[175],"ensemble":[177,184],"similar":[180],"methods,":[181],"can":[186],"advantage.":[190],"Meanwhile,":[191],"compared":[192],"with":[193],"embedding-base":[197],"that":[199],"used":[200],"<inline-formula":[201,222],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[202,223,240],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[203,224],"<tex-math":[204,225],"notation=\"LaTeX\">$83\\times":[205],"$":[206,227],"</tex-math></inline-formula>":[207,228],"pairs":[209],"than":[210],"ours,":[211],"approach":[213],"not":[214],"only":[215],"surpasses":[216],"performance":[218],"but":[219],"also":[220],"accelerates":[221],"notation=\"LaTeX\">$3\\times":[226],"time.":[231],"Codes":[232],"pre-trained":[234],"models":[235],"are":[236],"available":[237],"at":[238],"<uri":[239],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://github.com/KevinLight831/ESA</uri>":[241],".":[242]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":22},{"year":2024,"cited_by_count":17},{"year":2023,"cited_by_count":3}],"updated_date":"2026-03-27T14:29:43.386196","created_date":"2025-10-10T00:00:00"}
