{"id":"https://openalex.org/W7160295104","doi":"https://doi.org/10.1109/wacv61042.2026.00712","title":"MM-TS: Multi-Modal Temperature and Margin Schedules for Contrastive Learning with Long-Tail Data","display_name":"MM-TS: Multi-Modal Temperature and Margin Schedules for Contrastive Learning with Long-Tail Data","publication_year":2026,"publication_date":"2026-03-06","ids":{"openalex":"https://openalex.org/W7160295104","doi":"https://doi.org/10.1109/wacv61042.2026.00712"},"language":null,"primary_location":{"id":"doi:10.1109/wacv61042.2026.00712","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv61042.2026.00712","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128661012","display_name":"Siarhei Sheludzko","orcid":null},"institutions":[{"id":"https://openalex.org/I135140700","display_name":"University of Bonn","ror":"https://ror.org/041nas322","country_code":"DE","type":"education","lineage":["https://openalex.org/I135140700"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Siarhei Sheludzko","raw_affiliation_strings":["University of Bonn"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Bonn","institution_ids":["https://openalex.org/I135140700"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128643130","display_name":"Dhimitrios Duka","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dhimitrios Duka","raw_affiliation_strings":["MPI for Informatics, SIC"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"MPI for Informatics, SIC","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135337873","display_name":"Bernt Schiele","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bernt Schiele","raw_affiliation_strings":["MPI for Informatics, SIC"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"MPI for Informatics, SIC","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011795407","display_name":"Hilde Kuehne","orcid":"https://orcid.org/0000-0003-1079-4441"},"institutions":[{"id":"https://openalex.org/I8087733","display_name":"University of T\u00fcbingen","ror":"https://ror.org/03a1kwz48","country_code":"DE","type":"education","lineage":["https://openalex.org/I8087733"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Hilde Kuehne","raw_affiliation_strings":["Tuebingen AI Center/University of Tuebingen"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tuebingen AI Center/University of Tuebingen","institution_ids":["https://openalex.org/I8087733"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5135358371","display_name":"Anna Kukleva","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anna Kukleva","raw_affiliation_strings":["MPI for Informatics, SIC"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"MPI for Informatics, SIC","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.69049038,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7376","last_page":"7386"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.11490000039339066,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.11490000039339066,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.10649999976158142,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.1023000031709671,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.5232999920845032},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.3515999913215637},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.2687000036239624},{"id":"https://openalex.org/keywords/statistical-learning","display_name":"Statistical learning","score":0.2630999982357025},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.26170000433921814}],"concepts":[{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.5232999920845032},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5231000185012817},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5127000212669373},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3515999913215637},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3138999938964844},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.27160000801086426},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2711000144481659},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.2687000036239624},{"id":"https://openalex.org/C2982736386","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Statistical learning","level":2,"score":0.2630999982357025},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.26170000433921814},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.25760000944137573},{"id":"https://openalex.org/C2777629044","wikidata":"https://www.wikidata.org/wiki/Q614959","display_name":"Contrastive analysis","level":2,"score":0.25679999589920044},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.25619998574256897},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.25459998846054077}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/wacv61042.2026.00712","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wacv61042.2026.00712","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1773149199","https://openalex.org/W1861492603","https://openalex.org/W1905882502","https://openalex.org/W2096733369","https://openalex.org/W2132791018","https://openalex.org/W2625366777","https://openalex.org/W2886641317","https://openalex.org/W2952132648","https://openalex.org/W2963691377","https://openalex.org/W2970641574","https://openalex.org/W3035524453","https://openalex.org/W3175593095","https://openalex.org/W3176481196","https://openalex.org/W3202232857","https://openalex.org/W3207758636","https://openalex.org/W3214803981","https://openalex.org/W4285228357","https://openalex.org/W4312477916","https://openalex.org/W4382458595","https://openalex.org/W4386065512","https://openalex.org/W4386075721","https://openalex.org/W4386076314","https://openalex.org/W4386113244","https://openalex.org/W4390873312","https://openalex.org/W4390874449","https://openalex.org/W4402671548","https://openalex.org/W4402727764","https://openalex.org/W4403760819","https://openalex.org/W7133188694","https://openalex.org/W7133193597","https://openalex.org/W7133208266","https://openalex.org/W7133213514"],"related_works":[],"abstract_inverted_index":{"Contrastive":[0],"learning":[1,14],"has":[2,35],"become":[3],"a":[4,125,145],"fundamental":[5],"approach":[6,166],"in":[7,78,90,154,196],"both":[8],"uni-modal":[9,28,65],"and":[10,57,87,160,177,179,185,190],"multi-modal":[11,69,92,98,155],"frameworks.":[12],"This":[13],"paradigm":[15],"pulls":[16],"positive":[17],"pairs":[18],"of":[19,40,64,114],"samples":[20,119],"closer":[21],"while":[22],"pushing":[23],"negatives":[24],"apart.":[25],"In":[26,50],"the":[27,38,47,62,76,79,85,91,107,111,150,197],"setting":[29],"(e.g.,":[30],"image-based":[31],"learning),":[32],"previous":[33],"research":[34],"shown":[36],"that":[37,96,137,181],"strength":[39],"these":[41],"forces":[42,89],"can":[43,140],"be":[44,141],"controlled":[45],"through":[46],"temperature":[48,66,77,108,127,138,184],"parameter.":[49],"this":[51],"work,":[52],"we":[53,105,135],"propose":[54],"Multi-Modal":[55],"Temperature":[56],"Margin":[58],"Schedules":[59],"(MM-TS),":[60],"extending":[61],"concept":[63],"scheduling":[67,139],"to":[68,128,192],"contrastive":[70,80,156],"learning.":[71],"Our":[72],"method":[73],"dynamically":[74],"adjusts":[75],"loss":[81,159],"during":[82],"training,":[83],"modulating":[84],"attraction":[86],"repulsion":[88],"setting.":[93],"Additionally,":[94],"recognizing":[95],"standard":[97],"datasets":[99],"often":[100],"follow":[101],"imbalanced,":[102],"long-tail":[103],"distributions,":[104],"adapt":[106],"based":[109],"on":[110,167],"local":[112],"distribution":[113],"each":[115],"training":[116],"sample.":[117],"Specifically,":[118],"from":[120],"dense":[121],"clusters":[122],"are":[123],"assigned":[124],"higher":[126],"better":[129],"preserve":[130],"their":[131],"semantic":[132],"structure.":[133],"Furthermore,":[134],"demonstrate":[136],"effectively":[142],"integrated":[143],"within":[144],"max-margin":[146,161],"framework,":[147],"thereby":[148],"unifying":[149],"two":[151],"predominant":[152],"approaches":[153],"learning:":[157],"InfoNCE":[158],"objective.":[162],"We":[163],"evaluate":[164],"our":[165,182],"four":[168],"widely":[169],"used":[170],"image-and":[171],"video-language":[172],"datasets,":[173],"Flickr30K,":[174],"MSCOCO,":[175],"EPIC-KITCHENS-100,":[176],"YouCook2,":[178],"show":[180],"dynamic":[183],"margin":[186],"schedules":[187],"improve":[188],"performance":[189],"lead":[191],"new":[193],"state-of-the-art":[194],"results":[195],"field.<sup":[198],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[199],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">1</sup>":[200]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-06T00:00:00"}
