{"id":"https://openalex.org/W4377236344","doi":"https://doi.org/10.1145/3581807.3581857","title":"Semantic Maximum Relevance and Modal Alignment for Cross-Modal Retrieval","display_name":"Semantic Maximum Relevance and Modal Alignment for Cross-Modal Retrieval","publication_year":2022,"publication_date":"2022-11-17","ids":{"openalex":"https://openalex.org/W4377236344","doi":"https://doi.org/10.1145/3581807.3581857"},"language":"en","primary_location":{"id":"doi:10.1145/3581807.3581857","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581807.3581857","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 11th International Conference on Computing and Pattern Recognition","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101612132","display_name":"Pingping Sun","orcid":"https://orcid.org/0000-0001-7626-8167"},"institutions":[{"id":"https://openalex.org/I5343935","display_name":"Guilin University of Electronic Technology","ror":"https://ror.org/05arjae42","country_code":"CN","type":"education","lineage":["https://openalex.org/I5343935"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Pingping Sun","raw_affiliation_strings":["Guangxi Key Laboratory of Image and Graphic Intelligent Processing, Guilin University of Electronic Technology, China"],"raw_orcid":"https://orcid.org/0000-0001-7626-8167","affiliations":[{"raw_affiliation_string":"Guangxi Key Laboratory of Image and Graphic Intelligent Processing, Guilin University of Electronic Technology, China","institution_ids":["https://openalex.org/I5343935"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008213911","display_name":"Baohua Qiang","orcid":"https://orcid.org/0000-0002-3469-6590"},"institutions":[{"id":"https://openalex.org/I5343935","display_name":"Guilin University of Electronic Technology","ror":"https://ror.org/05arjae42","country_code":"CN","type":"education","lineage":["https://openalex.org/I5343935"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Baohua Qiang","raw_affiliation_strings":["Guangxi Key Laboratory of Image and Graphic Intelligent Processing, Guilin University of Electronic Technology, China"],"raw_orcid":"https://orcid.org/0000-0002-3469-6590","affiliations":[{"raw_affiliation_string":"Guangxi Key Laboratory of Image and Graphic Intelligent Processing, Guilin University of Electronic Technology, China","institution_ids":["https://openalex.org/I5343935"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081673517","display_name":"Zhiguang Liu","orcid":"https://orcid.org/0000-0001-5454-9435"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhiguang Liu","raw_affiliation_strings":["Hebei Key Laboratory of Intelligent Information Perception and Processing, The 54th Research Institute of CETC, China"],"raw_orcid":"https://orcid.org/0000-0001-5454-9435","affiliations":[{"raw_affiliation_string":"Hebei Key Laboratory of Intelligent Information Perception and Processing, The 54th Research Institute of CETC, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067186052","display_name":"Xianyi Yang","orcid":"https://orcid.org/0000-0002-9026-7934"},"institutions":[{"id":"https://openalex.org/I5343935","display_name":"Guilin University of Electronic Technology","ror":"https://ror.org/05arjae42","country_code":"CN","type":"education","lineage":["https://openalex.org/I5343935"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xianyi Yang","raw_affiliation_strings":["Guangxi Key Laboratory of Image and Graphic Intelligent Processing, Guilin University of Electronic Technology, China"],"raw_orcid":"https://orcid.org/0000-0002-9026-7934","affiliations":[{"raw_affiliation_string":"Guangxi Key Laboratory of Image and Graphic Intelligent Processing, Guilin University of Electronic Technology, China","institution_ids":["https://openalex.org/I5343935"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079435030","display_name":"Guangyong Xi","orcid":"https://orcid.org/0000-0002-1163-4565"},"institutions":[{"id":"https://openalex.org/I5343935","display_name":"Guilin University of Electronic Technology","ror":"https://ror.org/05arjae42","country_code":"CN","type":"education","lineage":["https://openalex.org/I5343935"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangyong Xi","raw_affiliation_strings":["Guangxi Key Laboratory of Image and Graphic Intelligent Processing, Guilin University of Electronic Technology, China"],"raw_orcid":"https://orcid.org/0000-0002-1163-4565","affiliations":[{"raw_affiliation_string":"Guangxi Key Laboratory of Image and Graphic Intelligent Processing, Guilin University of Electronic Technology, China","institution_ids":["https://openalex.org/I5343935"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101944075","display_name":"Weigang Liu","orcid":"https://orcid.org/0000-0002-6916-9950"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weigang Liu","raw_affiliation_strings":["Hebei Key Laboratory of Intelligent Information Perception and Processing, The 54th Research Institute of CETC, China"],"raw_orcid":"https://orcid.org/0000-0002-6916-9950","affiliations":[{"raw_affiliation_string":"Hebei Key Laboratory of Intelligent Information Perception and Processing, The 54th Research Institute of CETC, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101893411","display_name":"Ruidong Chen","orcid":"https://orcid.org/0000-0002-3083-014X"},"institutions":[{"id":"https://openalex.org/I5343935","display_name":"Guilin University of Electronic Technology","ror":"https://ror.org/05arjae42","country_code":"CN","type":"education","lineage":["https://openalex.org/I5343935"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruidong Chen","raw_affiliation_strings":["Guangxi Key Laboratory of Image and Graphic Intelligent Processing, Guilin University of Electronic Technology, China"],"raw_orcid":"https://orcid.org/0000-0002-3083-014X","affiliations":[{"raw_affiliation_string":"Guangxi Key Laboratory of Image and Graphic Intelligent Processing, Guilin University of Electronic Technology, China","institution_ids":["https://openalex.org/I5343935"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100624685","display_name":"Shihao Zhang","orcid":"https://orcid.org/0000-0001-6489-3694"},"institutions":[{"id":"https://openalex.org/I5343935","display_name":"Guilin University of Electronic Technology","ror":"https://ror.org/05arjae42","country_code":"CN","type":"education","lineage":["https://openalex.org/I5343935"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shihao Zhang","raw_affiliation_strings":["Guangxi Key Laboratory of Image and Graphic Intelligent Processing, Guilin University of Electronic Technology, China"],"raw_orcid":"https://orcid.org/0000-0001-6489-3694","affiliations":[{"raw_affiliation_string":"Guangxi Key Laboratory of Image and Graphic Intelligent Processing, Guilin University of Electronic Technology, China","institution_ids":["https://openalex.org/I5343935"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101612132"],"corresponding_institution_ids":["https://openalex.org/I5343935"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17262231,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"347","last_page":"354"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.7715966701507568},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6975338459014893},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6265875101089478},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5837002992630005},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.5258525013923645},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5033580660820007},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47957056760787964},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4536728858947754},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4496862292289734},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4383566379547119},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.42638644576072693},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4253726005554199},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.33478376269340515},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.27680638432502747}],"concepts":[{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.7715966701507568},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6975338459014893},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6265875101089478},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5837002992630005},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.5258525013923645},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5033580660820007},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47957056760787964},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4536728858947754},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4496862292289734},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4383566379547119},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.42638644576072693},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4253726005554199},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33478376269340515},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.27680638432502747},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3581807.3581857","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581807.3581857","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2022 11th International Conference on Computing and Pattern Recognition","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W639218804","https://openalex.org/W1964073652","https://openalex.org/W2007972815","https://openalex.org/W2013535308","https://openalex.org/W2106277773","https://openalex.org/W2138118304","https://openalex.org/W2194775991","https://openalex.org/W2605649771","https://openalex.org/W2765440071","https://openalex.org/W2909695232","https://openalex.org/W2913688275","https://openalex.org/W2969985801","https://openalex.org/W3135367836"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W2745001401","https://openalex.org/W4321353415","https://openalex.org/W2130974462","https://openalex.org/W972276598","https://openalex.org/W4246352526","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W2087343574","https://openalex.org/W2121910908"],"abstract_inverted_index":{"With":[0],"the":[1,11,43,53,62,67,72,77,90,109,129,144],"increasing":[2,113],"abundance":[3],"of":[4,55],"multimedia":[5],"data":[6],"resources,":[7],"researches":[8],"on":[9,123],"mining":[10],"relationship":[12],"between":[13,66,99],"different":[14],"modalities":[15],"to":[16,51,89,107,143],"achieve":[17],"refined":[18],"cross-modal":[19,136,146],"retrieval":[20,137,147],"are":[21],"gradually":[22],"emerging.":[23],"In":[24,83],"this":[25],"paper,":[26],"we":[27],"propose":[28],"a":[29],"novel":[30],"Semantic":[31],"Maximum":[32],"Relevance":[33],"and":[34,59,76,93,102,117,139],"Modal":[35],"Alignment":[36],"(SMR-MA)":[37],"for":[38],"Cross-Modal":[39],"Retrieval,":[40],"which":[41],"utilizes":[42],"pre-trained":[44],"model":[45],"with":[46,80],"abundant":[47],"image":[48,57],"text":[49],"information":[50,64],"extract":[52],"features":[54],"each":[56],"text,":[58],"further":[60],"promotes":[61],"modal":[63,73],"interaction":[65],"same":[68],"semantic":[69],"categories":[70],"through":[71],"alignment":[74],"module":[75],"multi-layer":[78],"perceptron":[79],"shared":[81],"weights.":[82],"addition,":[84],"multi-modal":[85],"embedding":[86,101],"is":[87,97,140],"distributed":[88],"normalized":[91],"hypersphere,":[92],"angular":[94,105],"edge":[95],"penalty":[96],"applied":[98],"feature":[100],"weight":[103],"in":[104,135],"space":[106],"maximize":[108],"classification":[110],"boundary,":[111],"thus":[112],"both":[114],"intra-class":[115],"similarity":[116],"inter-class":[118],"difference.":[119],"Comprehensive":[120],"analysis":[121],"experiments":[122],"three":[124],"benchmark":[125],"datasets":[126],"demonstrate":[127],"that":[128],"proposed":[130],"method":[131],"has":[132],"superior":[133,142],"performance":[134],"tasks":[138],"significantly":[141],"state-of-the-art":[145],"methods.":[148]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
