{"id":"https://openalex.org/W3209623075","doi":"https://doi.org/10.1145/3459637.3482158","title":"OTCMR: Bridging Heterogeneity Gap with Optimal Transport for Cross-modal Retrieval","display_name":"OTCMR: Bridging Heterogeneity Gap with Optimal Transport for Cross-modal Retrieval","publication_year":2021,"publication_date":"2021-10-26","ids":{"openalex":"https://openalex.org/W3209623075","doi":"https://doi.org/10.1145/3459637.3482158","mag":"3209623075"},"language":"en","primary_location":{"id":"doi:10.1145/3459637.3482158","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3459637.3482158","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Information &amp; Knowledge Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100443403","display_name":"Mingyang Li","orcid":"https://orcid.org/0000-0001-5973-7574"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Mingyang Li","raw_affiliation_strings":["Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088293566","display_name":"Shao\u2010Lun Huang","orcid":"https://orcid.org/0000-0003-2827-4022"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shao-Lun Huang","raw_affiliation_strings":["Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100351849","display_name":"Lin Zhang","orcid":"https://orcid.org/0000-0002-4360-5523"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lin Zhang","raw_affiliation_strings":["Tsinghua University, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Shenzhen, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100443403"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.4803,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.65647059,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"3216","last_page":"3220"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.8475911617279053},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7501777410507202},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.6479994654655457},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.640483021736145},{"id":"https://openalex.org/keywords/semantic-gap","display_name":"Semantic gap","score":0.5781881809234619},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5741697549819946},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.47179144620895386},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.4288046658039093},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4161161780357361},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4030342102050781},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3857171833515167},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3249935507774353},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.18181312084197998}],"concepts":[{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.8475911617279053},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7501777410507202},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.6479994654655457},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.640483021736145},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.5781881809234619},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5741697549819946},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.47179144620895386},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.4288046658039093},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4161161780357361},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4030342102050781},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3857171833515167},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3249935507774353},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.18181312084197998},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3459637.3482158","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3459637.3482158","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Information &amp; Knowledge Management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6483256435","display_name":null,"funder_award_id":"61807021","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1594039573","https://openalex.org/W2007972815","https://openalex.org/W2013535308","https://openalex.org/W2029241756","https://openalex.org/W2100235303","https://openalex.org/W2138118304","https://openalex.org/W2164530430","https://openalex.org/W2169067611","https://openalex.org/W2606965845","https://openalex.org/W2765440071","https://openalex.org/W2963187488","https://openalex.org/W2963767133","https://openalex.org/W2967957126","https://openalex.org/W4230927393","https://openalex.org/W4249033934"],"related_works":["https://openalex.org/W4388870064","https://openalex.org/W2210139803","https://openalex.org/W2487162673","https://openalex.org/W4235186151","https://openalex.org/W2793211469","https://openalex.org/W2054685365","https://openalex.org/W2128807628","https://openalex.org/W1968265719","https://openalex.org/W4234264766","https://openalex.org/W2415426693"],"abstract_inverted_index":{"Cross-modal":[0],"retrieval":[1,25,68,137,148,160],"is":[2,26],"a":[3,34,55],"classic":[4],"task":[5],"in":[6,33,66,108,121,158],"the":[7,29,39,44,63,73,86,91,98,109,118,152],"multimedia":[8],"community,":[9],"which":[10,150],"aims":[11],"to":[12,27],"search":[13],"for":[14,38,61,134],"semantically":[15],"similar":[16],"results":[17],"from":[18],"different":[19,81,122],"modalities.":[20],"The":[21],"core":[22],"of":[23,80,101,154],"cross-modal":[24,67,136,147,159],"learn":[28],"most":[30],"correlated":[31],"features":[32],"common":[35,110],"feature":[36,78,92,99,111],"space":[37],"multi-modal":[40,102],"data":[41,103],"so":[42],"that":[43],"similarity":[45],"can":[46,104],"be":[47,105],"directly":[48],"measured.":[49],"In":[50,95,113,139],"this":[51,96],"paper,":[52],"we":[53,71],"propose":[54],"novel":[56],"model":[57,116],"using":[58,155],"optimal":[59,74,156],"transport":[60,75,87,157],"bridging":[62],"heterogeneity":[64],"gap":[65],"tasks.":[69,161],"Specifically,":[70],"calculate":[72],"plans":[76],"between":[77],"distributions":[79,100],"modalities":[82],"and":[83,130],"then":[84],"minimize":[85],"cost":[88],"by":[89],"optimizing":[90],"embedding":[93],"functions.":[94],"way,":[97],"well":[106],"aligned":[107],"space.":[112],"addition,":[114],"our":[115,142],"combines":[117],"complementary":[119],"losses":[120],"levels:":[123],"1)":[124],"semantic":[125],"level,":[126,129],"2)":[127],"distributional":[128],"3)":[131],"pairwise":[132],"level":[133],"improving":[135],"performance.":[138],"extensive":[140],"experiments,":[141],"method":[143],"outperforms":[144],"many":[145],"other":[146],"methods,":[149],"proves":[151],"efficacy":[153]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
