{"id":"https://openalex.org/W4415540701","doi":"https://doi.org/10.1145/3746027.3755648","title":"Cross-Modal Retrieval with Cauchy-Schwarz Divergence","display_name":"Cross-Modal Retrieval with Cauchy-Schwarz Divergence","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415540701","doi":"https://doi.org/10.1145/3746027.3755648"},"language":"en","primary_location":{"id":"doi:10.1145/3746027.3755648","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755648","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111299124","display_name":"Jiahao Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiahao Zhang","raw_affiliation_strings":["The HongKong University of Science and Technology (Guangzhou), Guangzhou, Guangdong, China"],"affiliations":[{"raw_affiliation_string":"The HongKong University of Science and Technology (Guangzhou), Guangzhou, Guangdong, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061380168","display_name":"Wenzhe Yin","orcid":"https://orcid.org/0000-0003-3669-9987"},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]},{"id":"https://openalex.org/I4210135670","display_name":"Amsterdam University of the Arts","ror":"https://ror.org/04dde1554","country_code":"NL","type":"education","lineage":["https://openalex.org/I4210135670"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Wenzhe Yin","raw_affiliation_strings":["University of Amsterdam, Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I4210135670","https://openalex.org/I887064364"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041358503","display_name":"Shujian Yu","orcid":"https://orcid.org/0000-0002-6385-1705"},"institutions":[{"id":"https://openalex.org/I865915315","display_name":"Vrije Universiteit Amsterdam","ror":"https://ror.org/008xxew50","country_code":"NL","type":"education","lineage":["https://openalex.org/I865915315"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Shujian Yu","raw_affiliation_strings":["Vrije Universiteit Amsterdam, Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"Vrije Universiteit Amsterdam, Amsterdam, Netherlands","institution_ids":["https://openalex.org/I865915315"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5111299124"],"corresponding_institution_ids":["https://openalex.org/I139024713"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.32665735,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2064","last_page":"2073"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.6955000162124634},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.6937999725341797},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6287000179290771},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.5037000179290771},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.49729999899864197},{"id":"https://openalex.org/keywords/extension","display_name":"Extension (predicate logic)","score":0.44519999623298645},{"id":"https://openalex.org/keywords/sensitivity","display_name":"Sensitivity (control systems)","score":0.41429999470710754},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4124999940395355}],"concepts":[{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.6955000162124634},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.6937999725341797},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6287000179290771},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6159999966621399},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.5037000179290771},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.49729999899864197},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.44519999623298645},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4399000108242035},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.41429999470710754},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4124999940395355},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.39820000529289246},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.38449999690055847},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38260000944137573},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3686000108718872},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3328000009059906},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.30570000410079956},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2962000072002411},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.2874000072479248},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.27709999680519104},{"id":"https://openalex.org/C81793267","wikidata":"https://www.wikidata.org/wiki/Q7180962","display_name":"Phase retrieval","level":3,"score":0.2770000100135803},{"id":"https://openalex.org/C171752962","wikidata":"https://www.wikidata.org/wiki/Q255166","display_name":"Kullback\u2013Leibler divergence","level":2,"score":0.2696000039577484},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.26570001244544983},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.26489999890327454},{"id":"https://openalex.org/C2639959","wikidata":"https://www.wikidata.org/wiki/Q1344778","display_name":"Distance measures","level":2,"score":0.2603999972343445}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3746027.3755648","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746027.3755648","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:research.vu.nl:openaire_cris_publications/ba6fe521-3aaf-4731-8b34-31ba01b1ba46","is_oa":false,"landing_page_url":"https://research.vu.nl/en/publications/ba6fe521-3aaf-4731-8b34-31ba01b1ba46","pdf_url":null,"source":{"id":"https://openalex.org/S4306401107","display_name":"VU Research Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I865915315","host_organization_name":"Vrije Universiteit Amsterdam","host_organization_lineage":["https://openalex.org/I865915315"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Zhang, J, Yin, W & Yu, S 2025, Cross-Modal Retrieval with Cauchy-Schwarz Divergence. in MM '25: Proceedings of the 33rd ACM International Conference on Multimedia. Association for Computing Machinery, Inc, pp. 2064-2073, 33rd ACM International Conference on Multimedia, MM 2025, Dublin, Ireland, 27/10/25. https://doi.org/10.1145/3746027.3755648","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:research.vu.nl:publications/ba6fe521-3aaf-4731-8b34-31ba01b1ba46","is_oa":false,"landing_page_url":"https://hdl.handle.net/1871.1/ba6fe521-3aaf-4731-8b34-31ba01b1ba46","pdf_url":null,"source":{"id":"https://openalex.org/S4306401107","display_name":"VU Research Portal","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I865915315","host_organization_name":"Vrije Universiteit Amsterdam","host_organization_lineage":["https://openalex.org/I865915315"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Zhang, J, Yin, W & Yu, S 2025, Cross-Modal Retrieval with Cauchy-Schwarz Divergence. in MM '25: Proceedings of the 33rd ACM International Conference on Multimedia. Association for Computing Machinery, Inc, pp. 2064-2073, 33rd ACM International Conference on Multimedia, MM 2025, Dublin, Ireland, 27/10/25. https://doi.org/10.1145/3746027.3755648","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W1922199343","https://openalex.org/W1949478088","https://openalex.org/W2007972815","https://openalex.org/W2013535308","https://openalex.org/W2031774932","https://openalex.org/W2033406799","https://openalex.org/W2053667957","https://openalex.org/W2087501015","https://openalex.org/W2106277773","https://openalex.org/W2185175083","https://openalex.org/W2266728343","https://openalex.org/W2605649771","https://openalex.org/W2765440071","https://openalex.org/W2789933974","https://openalex.org/W2886281300","https://openalex.org/W2894786240","https://openalex.org/W2963275094","https://openalex.org/W2963288100","https://openalex.org/W2963449390","https://openalex.org/W2964216321","https://openalex.org/W2967957126","https://openalex.org/W3087516599","https://openalex.org/W3093291618","https://openalex.org/W3111129280","https://openalex.org/W3131455030","https://openalex.org/W3140270772","https://openalex.org/W4313145975","https://openalex.org/W4372260310","https://openalex.org/W4386065462","https://openalex.org/W4386075677","https://openalex.org/W4389257994","https://openalex.org/W4391053092","https://openalex.org/W4395663526","https://openalex.org/W4407354906","https://openalex.org/W4408296193","https://openalex.org/W4413145317"],"related_works":[],"abstract_inverted_index":{"Effective":[0],"cross-modal":[1],"retrieval":[2,16,78,138],"requires":[3],"robust":[4],"alignment":[5,22,97],"of":[6,56,98,130,142],"heterogeneous":[7],"data":[8],"types.":[9],"Most":[10],"existing":[11],"methods":[12,36],"focus":[13],"on":[14,20,123],"bi-modal":[15,135],"tasks":[17],"and":[18,31,48,77,136],"rely":[19],"distributional":[21],"techniques":[23],"such":[24],"as":[25],"Kullback-Leibler":[26],"divergence,":[27,68],"Maximum":[28],"Mean":[29],"Discrepancy,":[30],"correlation":[32],"alignment.":[33],"However,":[34],"these":[35],"often":[37],"suffer":[38],"from":[39],"critical":[40],"limitations,":[41],"including":[42],"numerical":[43],"instability,":[44],"sensitivity":[45],"to":[46,51],"hyperparameters,":[47],"their":[49],"inability":[50],"capture":[52],"the":[53,57,65,115,128],"full":[54],"structure":[55],"underlying":[58],"distributions.":[59],"In":[60],"this":[61],"paper,":[62],"we":[63],"introduce":[64],"Cauchy-Schwarz":[66],"(CS)":[67],"a":[69,83,104,109],"hyperparameter-free":[70],"measure":[71],"that":[72],"improves":[73],"both":[74,134],"training":[75],"stability":[76],"performance.":[79],"We":[80],"further":[81],"propose":[82],"novel":[84],"Generalized":[85],"CS":[86],"(GCS)":[87],"divergence":[88,145],"inspired":[89],"by":[90],"Holder's":[91],"inequality.":[92],"This":[93],"extension":[94],"enables":[95],"direct":[96],"three":[99],"or":[100],"more":[101],"modalities":[102],"within":[103],"unified":[105],"mathematical":[106],"framework":[107],"through":[108],"bidirectional":[110],"circular":[111],"comparison":[112],"scheme,":[113],"eliminating":[114],"need":[116],"for":[117],"exhaustive":[118],"pairwise":[119],"comparisons.":[120],"Extensive":[121],"experiments":[122],"six":[124],"benchmark":[125],"datasets":[126],"demonstrate":[127],"effectiveness":[129],"our":[131,143],"method":[132],"in":[133],"tri-modal":[137],"tasks.":[139],"The":[140],"code":[141],"CS/GCS":[144],"is":[146],"publicly":[147],"available":[148],"at":[149],"https://github.com/JiahaoZhang666/CSD.":[150]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-25T00:00:00"}
