{"id":"https://openalex.org/W7139910936","doi":"https://doi.org/10.1016/j.procs.2026.01.017","title":"Contrastive Multimodal Models for Zero-Shot Cross-Domain Vision-Language Retrieval","display_name":"Contrastive Multimodal Models for Zero-Shot Cross-Domain Vision-Language Retrieval","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7139910936","doi":"https://doi.org/10.1016/j.procs.2026.01.017"},"language":"en","primary_location":{"id":"doi:10.1016/j.procs.2026.01.017","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2026.01.017","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1016/j.procs.2026.01.017","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130220400","display_name":"Omprakash Dewangan","orcid":null},"institutions":[{"id":"https://openalex.org/I2800614057","display_name":"Kalinga University","ror":"https://ror.org/03afg5j45","country_code":"IN","type":"education","lineage":["https://openalex.org/I2800614057"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Omprakash Dewangan","raw_affiliation_strings":["Kalinga University, Naya Raipur, Chhattisgarh, India"],"affiliations":[{"raw_affiliation_string":"Kalinga University, Naya Raipur, Chhattisgarh, India","institution_ids":["https://openalex.org/I2800614057"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5130249971","display_name":"Aakansha Soy","orcid":null},"institutions":[{"id":"https://openalex.org/I2800614057","display_name":"Kalinga University","ror":"https://ror.org/03afg5j45","country_code":"IN","type":"education","lineage":["https://openalex.org/I2800614057"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Aakansha Soy","raw_affiliation_strings":["Kalinga University, Naya Raipur, Chhattisgarh, India"],"affiliations":[{"raw_affiliation_string":"Kalinga University, Naya Raipur, Chhattisgarh, India","institution_ids":["https://openalex.org/I2800614057"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5130220400"],"corresponding_institution_ids":["https://openalex.org/I2800614057"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.94871418,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":"275","issue":null,"first_page":"132","last_page":"139"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9634000062942505,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9634000062942505,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.007600000128149986,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.0044999998062849045,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.2321999967098236},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.2167000025510788},{"id":"https://openalex.org/keywords/video-retrieval","display_name":"Video retrieval","score":0.21050000190734863},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.2092999964952469}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.930400013923645},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5778999924659729},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5098000168800354},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.30469998717308044},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.24400000274181366},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2321999967098236},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.21729999780654907},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2167000025510788},{"id":"https://openalex.org/C2983174267","wikidata":"https://www.wikidata.org/wiki/Q3775098","display_name":"Video retrieval","level":2,"score":0.21050000190734863},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.2092999964952469}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1016/j.procs.2026.01.017","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2026.01.017","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1016/j.procs.2026.01.017","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2026.01.017","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.45487380027770996,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W4318215108","https://openalex.org/W4385067602","https://openalex.org/W4386982033","https://openalex.org/W4387059617","https://openalex.org/W4387221236","https://openalex.org/W4388494745","https://openalex.org/W4390231631","https://openalex.org/W4390506533","https://openalex.org/W4390686696","https://openalex.org/W4390974150","https://openalex.org/W4390974852","https://openalex.org/W4390974969","https://openalex.org/W4391545757","https://openalex.org/W4391843341","https://openalex.org/W4392234077","https://openalex.org/W4392902463","https://openalex.org/W4393618909","https://openalex.org/W4394850884","https://openalex.org/W4398198142","https://openalex.org/W4400205824","https://openalex.org/W4401453382","https://openalex.org/W4401580665","https://openalex.org/W4402274395","https://openalex.org/W4403446022","https://openalex.org/W4404085416","https://openalex.org/W4404284060","https://openalex.org/W4404300986","https://openalex.org/W4405271521","https://openalex.org/W4406523923","https://openalex.org/W4408781028","https://openalex.org/W4410382551","https://openalex.org/W4410428252","https://openalex.org/W4410616178","https://openalex.org/W4410639293","https://openalex.org/W4410949762","https://openalex.org/W4416288646"],"related_works":[],"abstract_inverted_index":{"Contrastive":[0,83],"Multimodal":[1,84],"Transformers":[2],"for":[3,191],"Zero-Shot":[4],"Cross-Domain":[5],"Vision-Language":[6],"Retrieval":[7],"Tasks":[8],"focus":[9],"on":[10,22],"aligning":[11],"heterogeneous":[12],"modalities":[13,121],"to":[14,50,71,111,179],"retrieve":[15],"semantically":[16,154],"related":[17],"content":[18],"without":[19],"explicit":[20],"training":[21,54,165],"the":[23,29,82,159,164],"target":[24,160],"domain.":[25,166],"This":[26,115],"paradigm":[27],"enhances":[28],"adaptability":[30],"of":[31,151],"retrieval":[32,150,176],"models":[33],"across":[34,120],"diverse":[35],"visual":[36,66],"and":[37,55,67,143,153,182],"textual":[38,68],"datasets.":[39],"However,":[40],"existing":[41,173],"methods":[42],"often":[43],"suffer":[44],"from":[45,106,163],"weak":[46],"cross-domain":[47,192],"generalization":[48],"due":[49],"distributional":[51],"shifts":[52],"between":[53],"unseen":[56],"domains.":[57],"They":[58],"also":[59],"struggle":[60],"with":[61,86,95],"limited":[62],"alignment":[63,119],"quality,":[64],"as":[65,136,187],"embeddings":[69],"fail":[70],"capture":[72],"domain-specific":[73],"semantics":[74],"effectively.":[75],"To":[76],"address":[77],"these":[78],"challenges,":[79],"we":[80],"propose":[81],"Transformer":[85],"Domain-Adaptive":[87],"Pretraining":[88],"(CMT-DAP).":[89],"The":[90,126],"framework":[91],"integrates":[92],"multimodal":[93,193],"transformers":[94],"a":[96,188],"domain-adaptive":[97],"contrastive":[98],"learning":[99],"stage,":[100],"where":[101],"large-scale":[102],"unlabeled":[103],"image\u2013text":[104],"pairs":[105],"multiple":[107],"domains":[108],"are":[109],"leveraged":[110],"learn":[112],"invariant":[113],"embeddings.":[114],"ensures":[116],"robust":[117],"semantic":[118,183],"while":[122],"improving":[123],"zero-shot":[124],"generalization.":[125],"proposed":[127],"method":[128],"can":[129],"be":[130],"effectively":[131],"applied":[132],"in":[133,175],"areas":[134],"such":[135],"medical":[137],"image\u2013report":[138],"retrieval,":[139],"cross-lingual":[140],"multimedia":[141],"search,":[142],"e-commerce":[144],"product\u2013review":[145],"alignment.":[146],"Specifically,":[147],"it":[148,186],"enables":[149],"accurate":[152],"relevant":[155],"results":[156],"even":[157],"when":[158],"domain":[161,180],"differs":[162],"Experimental":[167],"findings":[168],"demonstrate":[169],"that":[170],"CMT-DAP":[171],"outperforms":[172],"approaches":[174],"accuracy,":[177],"robustness":[178],"shifts,":[181],"consistency,":[184],"establishing":[185],"promising":[189],"solution":[190],"applications.":[194]},"counts_by_year":[],"updated_date":"2026-03-22T06:25:25.174409","created_date":"2026-03-21T00:00:00"}
