{"id":"https://openalex.org/W2954672622","doi":"https://doi.org/10.1145/3331184.3331213","title":"Scalable Deep Multimodal Learning for Cross-Modal Retrieval","display_name":"Scalable Deep Multimodal Learning for Cross-Modal Retrieval","publication_year":2019,"publication_date":"2019-07-18","ids":{"openalex":"https://openalex.org/W2954672622","doi":"https://doi.org/10.1145/3331184.3331213","mag":"2954672622"},"language":"en","primary_location":{"id":"doi:10.1145/3331184.3331213","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3331184.3331213","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082233466","display_name":"Peng Hu","orcid":"https://orcid.org/0000-0003-3868-3997"},"institutions":[{"id":"https://openalex.org/I24185976","display_name":"Sichuan University","ror":"https://ror.org/011ashp19","country_code":"CN","type":"education","lineage":["https://openalex.org/I24185976"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Peng Hu","raw_affiliation_strings":["Sichuan University, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Sichuan University, Chengdu, China","institution_ids":["https://openalex.org/I24185976"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086517854","display_name":"Liangli Zhen","orcid":"https://orcid.org/0000-0003-0481-3298"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Liangli Zhen","raw_affiliation_strings":["Agency for Science, Technology and Research, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Agency for Science, Technology and Research, Singapore, Singapore","institution_ids":["https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027770821","display_name":"Dezhong Peng","orcid":"https://orcid.org/0000-0002-0987-8472"},"institutions":[{"id":"https://openalex.org/I24185976","display_name":"Sichuan University","ror":"https://ror.org/011ashp19","country_code":"CN","type":"education","lineage":["https://openalex.org/I24185976"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dezhong Peng","raw_affiliation_strings":["Sichuan University, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Sichuan University, Chengdu, China","institution_ids":["https://openalex.org/I24185976"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006746040","display_name":"Pei Liu","orcid":"https://orcid.org/0000-0003-4954-6736"},"institutions":[{"id":"https://openalex.org/I24185976","display_name":"Sichuan University","ror":"https://ror.org/011ashp19","country_code":"CN","type":"education","lineage":["https://openalex.org/I24185976"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pei Liu","raw_affiliation_strings":["Sichuan University, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"Sichuan University, Chengdu, China","institution_ids":["https://openalex.org/I24185976"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5082233466"],"corresponding_institution_ids":["https://openalex.org/I24185976"],"apc_list":null,"apc_paid":null,"fwci":5.6684,"has_fulltext":false,"cited_by_count":130,"citation_normalized_percentile":{"value":0.96798927,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"635","last_page":"644"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9884999990463257,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8137589693069458},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.7447795867919922},{"id":"https://openalex.org/keywords/subspace-topology","display_name":"Subspace topology","score":0.6971526145935059},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6713232398033142},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6333741545677185},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6314058899879456},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.611903190612793},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.6082571148872375},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4996926784515381},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4838506281375885},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.45361968874931335},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.35812437534332275},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.0612298846244812}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8137589693069458},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.7447795867919922},{"id":"https://openalex.org/C32834561","wikidata":"https://www.wikidata.org/wiki/Q660730","display_name":"Subspace topology","level":2,"score":0.6971526145935059},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6713232398033142},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6333741545677185},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6314058899879456},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.611903190612793},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.6082571148872375},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4996926784515381},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4838506281375885},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.45361968874931335},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.35812437534332275},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.0612298846244812},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3331184.3331213","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3331184.3331213","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 42nd International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1497960466","https://openalex.org/W1522301498","https://openalex.org/W1522734439","https://openalex.org/W1523385540","https://openalex.org/W1665214252","https://openalex.org/W1686810756","https://openalex.org/W1883346539","https://openalex.org/W1964073652","https://openalex.org/W2007972815","https://openalex.org/W2013535308","https://openalex.org/W2016053056","https://openalex.org/W2021122545","https://openalex.org/W2025341678","https://openalex.org/W2030899956","https://openalex.org/W2052727801","https://openalex.org/W2071207147","https://openalex.org/W2096663965","https://openalex.org/W2106277773","https://openalex.org/W2186500555","https://openalex.org/W2210322478","https://openalex.org/W2232201864","https://openalex.org/W2428757121","https://openalex.org/W2476034201","https://openalex.org/W2560674852","https://openalex.org/W2564325886","https://openalex.org/W2571899125","https://openalex.org/W2574447816","https://openalex.org/W2602753196","https://openalex.org/W2605649771","https://openalex.org/W2606965845","https://openalex.org/W2618530766","https://openalex.org/W2725249286","https://openalex.org/W2765440071","https://openalex.org/W2793730104","https://openalex.org/W2795832645","https://openalex.org/W2799787995","https://openalex.org/W2807722546","https://openalex.org/W2809034148","https://openalex.org/W2890732907","https://openalex.org/W2894786240","https://openalex.org/W2919115771","https://openalex.org/W2942919589","https://openalex.org/W2963514026","https://openalex.org/W2964216321","https://openalex.org/W2967957126","https://openalex.org/W4241614188"],"related_works":["https://openalex.org/W73545470","https://openalex.org/W4224266612","https://openalex.org/W2383394264","https://openalex.org/W4320153225","https://openalex.org/W4293261942","https://openalex.org/W3125968744","https://openalex.org/W203959209","https://openalex.org/W2110287964","https://openalex.org/W2167701463","https://openalex.org/W4307407935"],"abstract_inverted_index":{"Cross-modal":[0],"retrieval":[1,21,86],"takes":[2],"one":[3,176],"type":[4],"of":[5,14,18,55,144,163,168,177,185,189],"data":[6,13,36,132,184],"as":[7],"the":[8,35,45,52,65,103,109,130,134,145,161,166,171,178,206,218],"query":[9],"to":[10,25,41,69,96,128,138,160,181],"retrieve":[11],"relevant":[12],"another":[15],"type.":[16],"Most":[17],"existing":[19,146],"cross-modal":[20,85,222],"approaches":[22],"were":[23],"proposed":[24,172,207],"learn":[26],"a":[27,31,83,98,192],"common":[28,99,136,194],"subspace":[29,137],"in":[30,101,213,221],"joint":[32],"manner,":[33],"where":[34],"from":[37,75],"all":[38],"modalities":[39,122,190],"have":[40],"be":[42,70,158,175],"involved":[43],"during":[44],"whole":[46,66],"training":[47],"process.":[48],"For":[49],"these":[50],"approaches,":[51],"optimal":[53],"parameters":[54],"different":[56,152],"modality-specific":[57,118,153],"transformations":[58],"are":[59],"dependent":[60],"on":[61,199],"each":[62,126],"other":[63],"and":[64,156,211,216],"model":[67],"has":[68],"retrained":[71],"when":[72],"handling":[73],"samples":[74],"new":[76],"modalities.":[77,164],"In":[78],"this":[79],"paper,":[80],"we":[81],"present":[82],"novel":[84],"method,":[87],"called":[88],"Scalable":[89],"Deep":[90],"Multimodal":[91],"Learning":[92],"(SDML).":[93],"It":[94],"proposes":[95],"predefine":[97],"subspace,":[100],"which":[102],"between-class":[104],"variation":[105,111],"is":[106,112,209],"maximized":[107],"while":[108],"within-class":[110],"minimized.":[113],"Then,":[114],"it":[115],"trains":[116],"m":[117,121],"networks":[119,154],"for":[120,125],"(one":[123],"network":[124],"modality)":[127],"transform":[129],"multimodal":[131,140,214],"into":[133,191],"predefined":[135,193],"achieve":[139],"learning.":[141],"Unlike":[142],"many":[143],"methods,":[147],"our":[148,169],"method":[149,208],"can":[150],"train":[151],"independently":[155,182],"thus":[157],"scalable":[159],"number":[162,188],"To":[165],"best":[167],"knowledge,":[170],"SDML":[173],"could":[174],"first":[179],"works":[180],"project":[183],"an":[186],"unfixed":[187],"subspace.":[195],"Comprehensive":[196],"experimental":[197],"results":[198],"four":[200],"widely-used":[201],"benchmark":[202],"datasets":[203],"demonstrate":[204],"that":[205],"effective":[210],"efficient":[212],"learning":[215],"outperforms":[217],"state-of-the-art":[219],"methods":[220],"retrieval.":[223]},"counts_by_year":[{"year":2025,"cited_by_count":23},{"year":2024,"cited_by_count":30},{"year":2023,"cited_by_count":21},{"year":2022,"cited_by_count":17},{"year":2021,"cited_by_count":24},{"year":2020,"cited_by_count":13},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
