{"id":"https://openalex.org/W4381327576","doi":"https://doi.org/10.1109/tip.2023.3286710","title":"Efficient Token-Guided Image-Text Retrieval With Consistent Multimodal Contrastive Training","display_name":"Efficient Token-Guided Image-Text Retrieval With Consistent Multimodal Contrastive Training","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4381327576","doi":"https://doi.org/10.1109/tip.2023.3286710","pmid":"https://pubmed.ncbi.nlm.nih.gov/37339023"},"language":"en","primary_location":{"id":"doi:10.1109/tip.2023.3286710","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2023.3286710","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100412247","display_name":"Chong Liu","orcid":"https://orcid.org/0000-0003-1180-3362"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chong Liu","raw_affiliation_strings":["State Key Laboratory of Computer Science, Institute of Software, Chinese Academy of Sciences, Beijing, China","Institute of Software, State Key Laboratory of Computer Science, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Computer Science, Institute of Software, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Software, State Key Laboratory of Computer Science, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100340299","display_name":"Yuqi Zhang","orcid":"https://orcid.org/0000-0001-7094-3838"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuqi Zhang","raw_affiliation_strings":["Alibaba Group, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014269015","display_name":"Hongsong Wang","orcid":"https://orcid.org/0000-0002-9464-1778"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongsong Wang","raw_affiliation_strings":["Department of Computer Science and Engineering, Southeast University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100625514","display_name":"Weihua Chen","orcid":"https://orcid.org/0000-0003-4141-7833"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weihua Chen","raw_affiliation_strings":["Alibaba Group, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100719173","display_name":"Fan Wang","orcid":"https://orcid.org/0000-0001-7320-1119"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fan Wang","raw_affiliation_strings":["Alibaba Group, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Beijing, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101550628","display_name":"Yan Huang","orcid":"https://orcid.org/0000-0002-8239-7229"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Huang","raw_affiliation_strings":["Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratory of Pattern Recognition (NLPR), Institute of Automation, Chinese Academy of Sciences (CASIA), Beijing, China","Institute of Automation, Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratory of Pattern Recognition (NLPR), Chinese Academy of Sciences (CASIA), Beijing, China"],"affiliations":[{"raw_affiliation_string":"Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratory of Pattern Recognition (NLPR), Institute of Automation, Chinese Academy of Sciences (CASIA), Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratory of Pattern Recognition (NLPR), Chinese Academy of Sciences (CASIA), Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102819706","display_name":"Yi-Dong Shen","orcid":"https://orcid.org/0009-0006-5782-2429"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi-Dong Shen","raw_affiliation_strings":["State Key Laboratory of Computer Science, Institute of Software, Chinese Academy of Sciences, Beijing, China","Institute of Software, State Key Laboratory of Computer Science, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Computer Science, Institute of Software, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Software, State Key Laboratory of Computer Science, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5115602506","display_name":"Liang Wang","orcid":"https://orcid.org/0000-0001-5224-8647"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang Wang","raw_affiliation_strings":["Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratory of Pattern Recognition (NLPR), Institute of Automation, Chinese Academy of Sciences (CASIA), Beijing, China","Institute of Automation, Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratory of Pattern Recognition (NLPR), Chinese Academy of Sciences (CASIA), Beijing, China"],"affiliations":[{"raw_affiliation_string":"Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratory of Pattern Recognition (NLPR), Institute of Automation, Chinese Academy of Sciences (CASIA), Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, Center for Research on Intelligent Perception and Computing (CRIPAC), National Laboratory of Pattern Recognition (NLPR), Chinese Academy of Sciences (CASIA), Beijing, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5100412247"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210128818"],"apc_list":null,"apc_paid":null,"fwci":5.6156,"has_fulltext":false,"cited_by_count":47,"citation_normalized_percentile":{"value":0.97151023,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"32","issue":null,"first_page":"3622","last_page":"3633"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7260090112686157},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6161367893218994},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.6131250262260437},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.5535974502563477},{"id":"https://openalex.org/keywords/image-processing","display_name":"Image processing","score":0.47722843289375305},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.47012317180633545},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.443543940782547},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.41514843702316284},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4058798551559448},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4038926362991333},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3949935734272003}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7260090112686157},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6161367893218994},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.6131250262260437},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.5535974502563477},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.47722843289375305},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.47012317180633545},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.443543940782547},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.41514843702316284},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4058798551559448},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4038926362991333},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3949935734272003},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tip.2023.3286710","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2023.3286710","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},{"id":"pmid:37339023","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/37339023","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on image processing : a publication of the IEEE Signal Processing Society","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7699999809265137}],"awards":[{"id":"https://openalex.org/G1383591865","display_name":null,"funder_award_id":"62276261","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3388623350","display_name":null,"funder_award_id":"2022ZD0117900","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G5156147101","display_name":null,"funder_award_id":"62236010","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":83,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1861492603","https://openalex.org/W2185175083","https://openalex.org/W2481240925","https://openalex.org/W2606473278","https://openalex.org/W2745461083","https://openalex.org/W2774267535","https://openalex.org/W2778940641","https://openalex.org/W2803158089","https://openalex.org/W2883311563","https://openalex.org/W2896457183","https://openalex.org/W2910453440","https://openalex.org/W2943911770","https://openalex.org/W2962964995","https://openalex.org/W2963350250","https://openalex.org/W2963389687","https://openalex.org/W2963966654","https://openalex.org/W2964157791","https://openalex.org/W2966715458","https://openalex.org/W2968124245","https://openalex.org/W2970231061","https://openalex.org/W2972073579","https://openalex.org/W2975813532","https://openalex.org/W2988823324","https://openalex.org/W2994818707","https://openalex.org/W2997591391","https://openalex.org/W2998356391","https://openalex.org/W3014611590","https://openalex.org/W3017098848","https://openalex.org/W3035454331","https://openalex.org/W3035688398","https://openalex.org/W3090449556","https://openalex.org/W3091588028","https://openalex.org/W3092820619","https://openalex.org/W3094172275","https://openalex.org/W3107593279","https://openalex.org/W3110536152","https://openalex.org/W3118694826","https://openalex.org/W3126337491","https://openalex.org/W3126792443","https://openalex.org/W3130289102","https://openalex.org/W3166304536","https://openalex.org/W3166396011","https://openalex.org/W3169472988","https://openalex.org/W3171668871","https://openalex.org/W3173220247","https://openalex.org/W3173909648","https://openalex.org/W3175888430","https://openalex.org/W3176013197","https://openalex.org/W3177343494","https://openalex.org/W3184735396","https://openalex.org/W3184784418","https://openalex.org/W3209163491","https://openalex.org/W3212024868","https://openalex.org/W3212436340","https://openalex.org/W3212610063","https://openalex.org/W3213100861","https://openalex.org/W4200498145","https://openalex.org/W4206314411","https://openalex.org/W4214819138","https://openalex.org/W4282937133","https://openalex.org/W4283030109","https://openalex.org/W4284697766","https://openalex.org/W4306802920","https://openalex.org/W4312877428","https://openalex.org/W4313178921","https://openalex.org/W6639102338","https://openalex.org/W6747225742","https://openalex.org/W6755207826","https://openalex.org/W6762175885","https://openalex.org/W6766904570","https://openalex.org/W6767279747","https://openalex.org/W6775188310","https://openalex.org/W6779473860","https://openalex.org/W6789753369","https://openalex.org/W6790019176","https://openalex.org/W6791353385","https://openalex.org/W6798805250","https://openalex.org/W6803448863","https://openalex.org/W6804198819","https://openalex.org/W6838679761","https://openalex.org/W6839167420","https://openalex.org/W6846306129"],"related_works":["https://openalex.org/W4388335561","https://openalex.org/W2970530566","https://openalex.org/W2967478618","https://openalex.org/W2997152889","https://openalex.org/W4385572700","https://openalex.org/W4307309205","https://openalex.org/W4288261899","https://openalex.org/W4385009901","https://openalex.org/W230091440","https://openalex.org/W4285141722"],"abstract_inverted_index":{"Image-text":[0],"retrieval":[1,69,83,95,180,232],"is":[2,113,156,192,246],"a":[3,74,97,108,138,170,214],"central":[4],"problem":[5],"for":[6,20,62,67,150,158],"understanding":[7],"the":[8,18,35,43,54,124,132,176,197,208,220,227],"semantic":[9,133,201],"relationship":[10],"between":[11,45,57,203],"vision":[12],"and":[13,15,23,38,50,59,103,127,152,166,173,199,205,223],"language,":[14],"serves":[16],"as":[17,118],"basis":[19],"various":[21],"visual":[22],"language":[24],"tasks.":[25],"Most":[26],"previous":[27,77],"works":[28,78],"either":[29],"simply":[30],"learn":[31],"coarse-grained":[32],"representations":[33,61],"of":[34,146,178],"overall":[36],"image":[37,46,151],"text,":[39],"or":[40,48,85],"elaborately":[41],"establish":[42],"correspondence":[44],"regions":[47],"pixels":[49],"text":[51,153],"words.":[52],"However,":[53],"close":[55],"relations":[56],"coarse-":[58,102,165],"fine-grained":[60,104,167],"each":[63],"modality":[64],"are":[65],"important":[66],"image-text":[68,94,159],"but":[70],"almost":[71],"neglected.":[72],"As":[73],"result,":[75],"such":[76],"inevitably":[79],"suffer":[80],"from":[81,96],"low":[82,236],"accuracy":[84],"heavy":[86],"computational":[87],"cost.":[88],"In":[89],"this":[90,136],"work,":[91],"we":[92],"address":[93],"novel":[98,183],"perspective":[99],"by":[100],"combining":[101],"representation":[105],"learning":[106],"into":[107,169],"unified":[109,171],"framework.":[110],"This":[111],"framework":[112,172],"consistent":[114],"with":[115,213,234,241],"human":[116],"cognition,":[117],"humans":[119],"simultaneously":[120],"pay":[121],"attention":[122],"to":[123,130,195],"entire":[125],"sample":[126],"regional":[128],"elements":[129],"understand":[131],"content.":[134],"To":[135],"end,":[137],"Token-Guided":[139],"Dual":[140],"Transformer":[141],"(TGDT)":[142],"architecture":[143],"which":[144],"consists":[145],"two":[147],"homogeneous":[148],"branches":[149],"modalities,":[154],"respectively,":[155],"proposed":[157,193,228],"retrieval.":[160],"The":[161],"TGDT":[162],"incorporates":[163],"both":[164,179],"retrievals":[168],"beneficially":[174],"leverages":[175],"advantages":[177],"approaches.":[181,244],"A":[182],"training":[184],"objective":[185],"called":[186],"Consistent":[187],"Multimodal":[188],"Contrastive":[189],"(CMC)":[190],"loss":[191],"accordingly":[194],"ensure":[196],"intra-":[198],"inter-modal":[200],"consistencies":[202],"images":[204],"texts":[206],"in":[207],"common":[209],"embedding":[210],"space.":[211],"Equipped":[212],"two-stage":[215],"inference":[216,237],"method":[217,229],"based":[218],"on":[219],"mixed":[221],"global":[222],"local":[224],"cross-modal":[225],"similarity,":[226],"achieves":[230],"state-of-the-art":[231],"performances":[233],"extremely":[235],"time":[238],"when":[239],"compared":[240],"representative":[242],"recent":[243],"Code":[245],"publicly":[247],"available:":[248],"github.com/LCFractal/TGDT.":[249]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":24},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":2}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
