{"id":"https://openalex.org/W4200245408","doi":"https://doi.org/10.1145/3487075.3487167","title":"Super Visual Semantic Embedding for Cross-Modal Image-Text Retrieval","display_name":"Super Visual Semantic Embedding for Cross-Modal Image-Text Retrieval","publication_year":2021,"publication_date":"2021-10-19","ids":{"openalex":"https://openalex.org/W4200245408","doi":"https://doi.org/10.1145/3487075.3487167"},"language":"en","primary_location":{"id":"doi:10.1145/3487075.3487167","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3487075.3487167","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th International Conference on Computer Science and Application Engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035305012","display_name":"Zhixian Zeng","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhixian Zeng","raw_affiliation_strings":["Department of The Sixty-third Research Institute, University of National University of Defense Technology, China"],"affiliations":[{"raw_affiliation_string":"Department of The Sixty-third Research Institute, University of National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103035568","display_name":"Jianjun Cao","orcid":"https://orcid.org/0000-0002-4281-8324"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianjun Cao","raw_affiliation_strings":["Department of The Sixty-third Research Institute, University of National University of Defense Technology, China"],"affiliations":[{"raw_affiliation_string":"Department of The Sixty-third Research Institute, University of National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002373133","display_name":"Guoquan Jiang","orcid":"https://orcid.org/0000-0002-8729-0017"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoquan Jiang","raw_affiliation_strings":["Department of The Sixty-third Research Institute, University of National University of Defense Technology, China"],"affiliations":[{"raw_affiliation_string":"Department of The Sixty-third Research Institute, University of National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047466858","display_name":"Nianfeng Weng","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Nianfeng Weng","raw_affiliation_strings":["Department of The Sixty-third Research Institute, University of National University of Defense Technology, China"],"affiliations":[{"raw_affiliation_string":"Department of The Sixty-third Research Institute, University of National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101560065","display_name":"Yuxin Xu","orcid":"https://orcid.org/0000-0003-1995-8334"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuxin Xu","raw_affiliation_strings":["School of Computer &amp; Software, Nanjing University of Information Science &amp; Technology, China"],"affiliations":[{"raw_affiliation_string":"School of Computer &amp; Software, Nanjing University of Information Science &amp; Technology, China","institution_ids":["https://openalex.org/I200845125"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074156874","display_name":"Zibo Nie","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zibo Nie","raw_affiliation_strings":["Department of The Sixty-third Research Institute, University of National University of Defense Technology, China"],"affiliations":[{"raw_affiliation_string":"Department of The Sixty-third Research Institute, University of National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5035305012"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16624183,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7703533172607422},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7467623949050903},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5347819328308105},{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.5290600061416626},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.47561389207839966},{"id":"https://openalex.org/keywords/semantic-feature","display_name":"Semantic feature","score":0.4487537145614624},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.4449481964111328},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.44307228922843933},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.4262595772743225},{"id":"https://openalex.org/keywords/pooling","display_name":"Pooling","score":0.41126880049705505},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.41023287177085876},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.21976974606513977}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7703533172607422},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7467623949050903},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5347819328308105},{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.5290600061416626},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.47561389207839966},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.4487537145614624},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.4449481964111328},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.44307228922843933},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.4262595772743225},{"id":"https://openalex.org/C70437156","wikidata":"https://www.wikidata.org/wiki/Q7228652","display_name":"Pooling","level":2,"score":0.41126880049705505},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.41023287177085876},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.21976974606513977},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3487075.3487167","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3487075.3487167","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 5th International Conference on Computer Science and Application Engineering","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1527575280","https://openalex.org/W2025341678","https://openalex.org/W2108598243","https://openalex.org/W2185175083","https://openalex.org/W2277195237","https://openalex.org/W2517194566","https://openalex.org/W2519887557","https://openalex.org/W2560662850","https://openalex.org/W2745461083","https://openalex.org/W2896457183","https://openalex.org/W2962964995","https://openalex.org/W2991539277","https://openalex.org/W2997326970","https://openalex.org/W3010363512","https://openalex.org/W3010442962","https://openalex.org/W3106780750","https://openalex.org/W3119356360","https://openalex.org/W6639102338","https://openalex.org/W6687483927","https://openalex.org/W6780839027"],"related_works":["https://openalex.org/W3107204728","https://openalex.org/W4287591324","https://openalex.org/W4226420367","https://openalex.org/W2980176872","https://openalex.org/W2962876041","https://openalex.org/W3134502938","https://openalex.org/W2006017062","https://openalex.org/W2891409152","https://openalex.org/W4387143394","https://openalex.org/W2384506582"],"abstract_inverted_index":{"Visual":[0,63],"semantic":[1,21,124,199],"embedding":[2,22,81,86,94,142,150,178,187],"network":[3,7,23,27,82,100],"or":[4],"cross-modal":[5,25,69,198],"cross-attention":[6,26],"are":[8,173],"usually":[9],"adopted":[10,196],"for":[11,68,122,197],"image-text":[12,70],"retrieval.":[13],"Existing":[14],"works":[15],"have":[16],"confirmed":[17],"that":[18,40],"both":[19],"visual":[20],"and":[24,46,83,212,233],"can":[28],"achieve":[29],"similar":[30],"performance,":[31],"but":[32],"the":[33,54,79,84,89,92,105,109,127,135,145,148,154,169,185,218],"former":[34],"has":[35],"lower":[36],"computational":[37],"complexity":[38],"so":[39],"its":[41,47],"retrieval":[42,229,235],"speed":[43],"is":[44,51,101,131,195],"faster":[45],"engineering":[48],"application":[49],"value":[50],"higher":[52],"than":[53],"latter.":[55],"In":[56,88,144,184],"this":[57],"paper,":[58],"we":[59,112,152],"propose":[60],"a":[61,97,114,140,176,181,189],"Super":[62],"Semantic":[64],"Embedding":[65],"Network":[66],"(SVSEN)":[67],"retrieval,":[71],"which":[72,162],"contains":[73],"two":[74,206],"independent":[75],"branch":[76],"substructures":[77],"including":[78],"image":[80,93,123,136,228],"text":[85,149,234],"network.":[87],"design":[90,113,146],"of":[91,108,147],"network,":[95,151],"firstly,":[96],"feature":[98],"extraction":[99],"employed":[102],"to":[103,133,139,157,175],"extract":[104,158],"fine-grained":[106,137,170],"features":[107,138],"image.":[110],"Then,":[111],"graph":[115],"attention":[116],"mechanism":[117],"module":[118],"with":[119],"residual":[120],"link":[121],"enhancement.":[125],"Finally,":[126,168],"Softmax":[128],"pooling":[129],"strategy":[130],"used":[132,208],"map":[134],"common":[141,177,186],"space.":[143],"use":[153],"pre-trained":[155],"BERT-base-uncased":[156],"context-related":[159],"word":[160,171],"vectors,":[161],"will":[163],"be":[164],"fine-tuned":[165],"in":[166],"training.":[167],"vectors":[172],"mapped":[174],"space":[179],"by":[180,230,236],"maximum":[182],"pooling.":[183],"space,":[188],"soft":[190],"label-based":[191],"triplet":[192],"loss":[193],"function":[194],"alignment":[200],"learning.":[201],"Through":[202],"experimental":[203],"verification":[204],"on":[205,223],"widely":[207],"datasets,":[209],"namely":[210],"MS-COCO":[211],"Flickr-30K,":[213,224],"our":[214,225],"proposed":[215],"SVSEN":[216,226],"achieves":[217],"best":[219],"performance.":[220],"For":[221],"instance,":[222],"outperforms":[227],"3.91%":[231],"relatively":[232,238],"1.96%":[237],"([email":[239],"protected]).":[240]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
