{"id":"https://openalex.org/W7134840473","doi":"https://doi.org/10.48550/arxiv.2603.08202","title":"MM-TS: Multi-Modal Temperature and Margin Schedules for Contrastive Learning with Long-Tail Data","display_name":"MM-TS: Multi-Modal Temperature and Margin Schedules for Contrastive Learning with Long-Tail Data","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7134840473","doi":"https://doi.org/10.48550/arxiv.2603.08202"},"language":"en","primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.08202","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128661012","display_name":"Siarhei Sheludzko","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sheludzko, Siarhei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128643130","display_name":"Dhimitrios Duka","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Duka, Dhimitrios","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128649452","display_name":"Bernt Schiele","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Schiele, Bernt","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121265388","display_name":"Hilde Kuehne","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kuehne, Hilde","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128661372","display_name":"Anna Kukleva","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kukleva, Anna","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5077000260353088,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.5077000260353088,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.2678999900817871,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.04230000078678131,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.611299991607666},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5681999921798706},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.4049000144004822},{"id":"https://openalex.org/keywords/structured-prediction","display_name":"Structured prediction","score":0.3427000045776367},{"id":"https://openalex.org/keywords/temperature-measurement","display_name":"Temperature measurement","score":0.32510000467300415},{"id":"https://openalex.org/keywords/contrastive-analysis","display_name":"Contrastive analysis","score":0.28630000352859497}],"concepts":[{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.611299991607666},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.598800003528595},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5681999921798706},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5218999981880188},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.4049000144004822},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.3427000045776367},{"id":"https://openalex.org/C72293138","wikidata":"https://www.wikidata.org/wiki/Q909741","display_name":"Temperature measurement","level":2,"score":0.32510000467300415},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3221000134944916},{"id":"https://openalex.org/C2777629044","wikidata":"https://www.wikidata.org/wiki/Q614959","display_name":"Contrastive analysis","level":2,"score":0.28630000352859497},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.2766000032424927},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2565000057220459},{"id":"https://openalex.org/C2982736386","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Statistical learning","level":2,"score":0.2531000077724457},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.25200000405311584}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.08202","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"pmh:oai:pure.mpg.de:item_3697946","is_oa":true,"landing_page_url":"https://hdl.handle.net/21.11116/0000-0012-B3B9-1","pdf_url":null,"source":{"id":"https://openalex.org/S4306400654","display_name":"MPG.PuRe (Max Planck Society)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I149899117","host_organization_name":"Max Planck Society","host_organization_lineage":["https://openalex.org/I149899117"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/workingPaper"},{"id":"doi:10.48550/arxiv.2603.08202","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08202","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.08202","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.4280807077884674,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Contrastive":[0],"learning":[1,14],"has":[2,35],"become":[3],"a":[4,125,145],"fundamental":[5],"approach":[6,166],"in":[7,78,90,154,197],"both":[8],"uni-modal":[9,28,65],"and":[10,57,87,160,172,178,180,186,191],"multi-modal":[11,69,92,98,155],"frameworks.":[12],"This":[13],"paradigm":[15],"pulls":[16],"positive":[17],"pairs":[18],"of":[19,40,64,114],"samples":[20,119],"closer":[21],"while":[22],"pushing":[23],"negatives":[24],"apart.":[25],"In":[26,50],"the":[27,38,47,62,76,79,85,91,107,111,150,198],"setting":[29],"(e.g.,":[30],"image-based":[31],"learning),":[32],"previous":[33],"research":[34],"shown":[36],"that":[37,96,137,182],"strength":[39],"these":[41],"forces":[42,89],"can":[43,140],"be":[44,141],"controlled":[45],"through":[46],"temperature":[48,66,77,108,127,138,185],"parameter.":[49],"this":[51],"work,":[52],"we":[53,105,135],"propose":[54],"Multi-Modal":[55],"Temperature":[56],"Margin":[58],"Schedules":[59],"(MM-TS),":[60],"extending":[61],"concept":[63],"scheduling":[67,139],"to":[68,128,193],"contrastive":[70,80,156],"learning.":[71],"Our":[72],"method":[73],"dynamically":[74],"adjusts":[75],"loss":[81,159],"during":[82],"training,":[83],"modulating":[84],"attraction":[86],"repulsion":[88],"setting.":[93],"Additionally,":[94],"recognizing":[95],"standard":[97],"datasets":[99],"often":[100],"follow":[101],"imbalanced,":[102],"long-tail":[103],"distributions,":[104],"adapt":[106],"based":[109],"on":[110,167],"local":[112],"distribution":[113],"each":[115],"training":[116],"sample.":[117],"Specifically,":[118],"from":[120],"dense":[121],"clusters":[122],"are":[123],"assigned":[124],"higher":[126],"better":[129],"preserve":[130],"their":[131],"semantic":[132],"structure.":[133],"Furthermore,":[134],"demonstrate":[136],"effectively":[142],"integrated":[143],"within":[144],"max-margin":[146,161],"framework,":[147],"thereby":[148],"unifying":[149],"two":[151],"predominant":[152],"approaches":[153],"learning:":[157],"InfoNCE":[158],"objective.":[162],"We":[163],"evaluate":[164],"our":[165,183],"four":[168],"widely":[169],"used":[170],"image-":[171],"video-language":[173],"datasets,":[174],"Flickr30K,":[175],"MSCOCO,":[176],"EPIC-KITCHENS-100,":[177],"YouCook2,":[179],"show":[181],"dynamic":[184],"margin":[187],"schedules":[188],"improve":[189],"performance":[190],"lead":[192],"new":[194],"state-of-the-art":[195],"results":[196],"field.":[199]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-11T00:00:00"}
