{"id":"https://openalex.org/W7161430484","doi":"https://doi.org/10.1016/j.neunet.2026.109098","title":"Stochastic approximation to contrastive learning","display_name":"Stochastic approximation to contrastive learning","publication_year":2026,"publication_date":"2026-05-16","ids":{"openalex":"https://openalex.org/W7161430484","doi":"https://doi.org/10.1016/j.neunet.2026.109098","pmid":"https://pubmed.ncbi.nlm.nih.gov/42172975"},"language":"en","primary_location":{"id":"doi:10.1016/j.neunet.2026.109098","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.neunet.2026.109098","pdf_url":null,"source":{"id":"https://openalex.org/S123019304","display_name":"Neural Networks","issn_l":"0893-6080","issn":["0893-6080","1879-2782"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Networks","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1016/j.neunet.2026.109098","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136310828","display_name":"Erland B. Olsson","orcid":null},"institutions":[{"id":"https://openalex.org/I204778367","display_name":"Norwegian University of Science and Technology","ror":"https://ror.org/05xg72x27","country_code":"NO","type":"education","lineage":["https://openalex.org/I204778367"]}],"countries":["NO"],"is_corresponding":true,"raw_author_name":"Erland Brandser Olsson","raw_affiliation_strings":["Department of Computer Science, Norwegian University of Science and Technology, Norway. Electronic address: erland.b.olsson@ntnu.no"],"raw_orcid":"https://orcid.org/0009-0008-0220-6949","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Norwegian University of Science and Technology, Norway. Electronic address: erland.b.olsson@ntnu.no","institution_ids":["https://openalex.org/I204778367"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080414124","display_name":"Zhirong Yang","orcid":"https://orcid.org/0000-0001-8412-5684"},"institutions":[{"id":"https://openalex.org/I204778367","display_name":"Norwegian University of Science and Technology","ror":"https://ror.org/05xg72x27","country_code":"NO","type":"education","lineage":["https://openalex.org/I204778367"]}],"countries":["NO"],"is_corresponding":true,"raw_author_name":"Zhirong Yang","raw_affiliation_strings":["Department of Computer Science, Norwegian University of Science and Technology, Norway; Jinhua Institute of Zhejiang University, China. Electronic address: zhirong.yang@ntnu.no"],"raw_orcid":"https://orcid.org/0000-0001-8412-5684","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Norwegian University of Science and Technology, Norway; Jinhua Institute of Zhejiang University, China. Electronic address: zhirong.yang@ntnu.no","institution_ids":["https://openalex.org/I204778367"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5080414124","https://openalex.org/A5136310828"],"corresponding_institution_ids":["https://openalex.org/I204778367"],"apc_list":{"value":3350,"currency":"USD","value_usd":3350},"apc_paid":{"value":3350,"currency":"USD","value_usd":3350},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.90259571,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":"203","issue":null,"first_page":"109098","last_page":"109098"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.5304999947547913,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.5304999947547913,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.11840000003576279,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.05469999834895134,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.5436999797821045},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.4948999881744385},{"id":"https://openalex.org/keywords/contrast","display_name":"Contrast (vision)","score":0.4293000102043152},{"id":"https://openalex.org/keywords/stochastic-approximation","display_name":"Stochastic approximation","score":0.40549999475479126},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.36719998717308044},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.3400000035762787},{"id":"https://openalex.org/keywords/online-learning","display_name":"Online learning","score":0.3393999934196472},{"id":"https://openalex.org/keywords/learning-to-learn","display_name":"Learning to learn","score":0.3314000070095062}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6823999881744385},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5827000141143799},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.5436999797821045},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.4948999881744385},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.4293000102043152},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41530001163482666},{"id":"https://openalex.org/C55479107","wikidata":"https://www.wikidata.org/wiki/Q97663916","display_name":"Stochastic approximation","level":3,"score":0.40549999475479126},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.36719998717308044},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3483000099658966},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.3400000035762787},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.3393999934196472},{"id":"https://openalex.org/C2986563244","wikidata":"https://www.wikidata.org/wiki/Q6822310","display_name":"Learning to learn","level":2,"score":0.3314000070095062},{"id":"https://openalex.org/C2777629044","wikidata":"https://www.wikidata.org/wiki/Q614959","display_name":"Contrastive analysis","level":2,"score":0.3240000009536743},{"id":"https://openalex.org/C106487976","wikidata":"https://www.wikidata.org/wiki/Q685816","display_name":"Matrix (chemical analysis)","level":2,"score":0.3212999999523163},{"id":"https://openalex.org/C148764684","wikidata":"https://www.wikidata.org/wiki/Q621751","display_name":"Approximation algorithm","level":2,"score":0.3188999891281128},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3111000061035156},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.30469998717308044},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3012999892234802},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.2985999882221222},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2849999964237213},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.28369998931884766},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.2777999937534332},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2759999930858612},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.2565000057220459},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.25450000166893005}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1016/j.neunet.2026.109098","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.neunet.2026.109098","pdf_url":null,"source":{"id":"https://openalex.org/S123019304","display_name":"Neural Networks","issn_l":"0893-6080","issn":["0893-6080","1879-2782"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Networks","raw_type":"journal-article"},{"id":"pmid:42172975","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/42172975","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural networks : the official journal of the International Neural Network Society","raw_type":null}],"best_oa_location":{"id":"doi:10.1016/j.neunet.2026.109098","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.neunet.2026.109098","pdf_url":null,"source":{"id":"https://openalex.org/S123019304","display_name":"Neural Networks","issn_l":"0893-6080","issn":["0893-6080","1879-2782"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Neural Networks","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320311040","display_name":"Norges Teknisk-Naturvitenskapelige Universitet","ror":"https://ror.org/05xg72x27"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W2031489346","https://openalex.org/W2117539524","https://openalex.org/W2163922914","https://openalex.org/W2889326414","https://openalex.org/W2919115771","https://openalex.org/W3023371261","https://openalex.org/W3114632476","https://openalex.org/W4301430669","https://openalex.org/W4311375103","https://openalex.org/W4413859938","https://openalex.org/W7133242605"],"related_works":[],"abstract_inverted_index":{"Contrastive":[0],"learning":[1,70,130,144,198,231],"pulls":[2],"positive":[3,91,113,194],"samples":[4,11,94,125,177,195],"(similar":[5],"examples)":[6,13],"closer":[7],"and":[8,47,53,60,65,92,95,114,217,233],"pushes":[9],"negative":[10,93,115,124,176,204,222],"(dissimilar":[12],"away":[14],"to":[15,22,41,44,107,191,209,239],"learn":[16],"a":[17,28,49,110,138,146,152,214,247],"meaningful":[18],"mapping":[19],"from":[20,38,203],"inputs":[21],"outputs.":[23],"It":[24],"is":[25,161,208,234],"found":[26],"in":[27,33,55,99,236],"broad":[29,51],"range":[30,37],"of":[31,90,155],"applications":[32],"computer":[34],"vision":[35],"that":[36,126,141,171,211],"image":[39],"classification":[40],"object":[42],"detection":[43],"video":[45],"processing":[46],"has":[48],"similarly":[50],"impact":[52],"relevance":[54],"natural":[56],"language":[57],"processing,":[58],"audio":[59],"speech,":[61],"graphs,":[62],"recommendation":[63],"systems,":[64],"multimodal":[66],"learning.":[67],"Although":[68],"contrastive":[69,143,230],"done":[71],"right":[72],"typically":[73],"achieves":[74],"state-of-the-art":[75,241],"results,":[76],"there":[77],"are":[78,96],"major":[79],"limitations":[80],"with":[81,174,196,213],"traditional":[82],"methods":[83],"because":[84],"they":[85,103],"rely":[86],"on":[87,123,243],"arbitrary":[88],"definitions":[89],"not":[97],"decomposable":[98,162],"minibatch":[100],"optimization.":[101],"Thus,":[102],"require":[104],"large":[105],"batchsizes":[106],"effectively":[108],"manage":[109],"tradeoff":[111],"between":[112],"terms.":[116],"This":[117],"approach":[118],"wastes":[119],"significant":[120],"computational":[121,201],"resources":[122],"may":[127],"have":[128],"minimal":[129],"signals.":[131],"To":[132],"address":[133],"these":[134],"limitations,":[135],"we":[136,183],"propose":[137],"novel":[139],"method":[140,227],"reformulates":[142],"as":[145,218,220],"matrix":[147],"approximation":[148,169],"problem":[149],"using":[150],"I-divergence,":[151],"non-normalized":[153],"variant":[154],"Kullback-Leibler":[156],"divergence.":[157],"Our":[158,206],"objective":[159],"function":[160],"across":[163],"instance":[164],"pairs,":[165],"enabling":[166],"efficient":[167],"stochastic":[168],"algorithms":[170],"perform":[172],"well":[173],"fewer":[175],"by":[178],"leveraging":[179],"neighbor":[180],"embeddings.":[181],"Additionally,":[182],"generalize":[184],"the":[185],"scaling":[186],"factor":[187],"beyond":[188],"standard":[189],"normalization":[190],"adaptively":[192],"emphasize":[193],"higher":[197,248],"potential,":[199],"reducing":[200],"waste":[202],"samples.":[205],"ambition":[207],"demonstrate":[210],"even":[212],"low":[215],"batchsize":[216],"few":[219],"one":[221],"term":[223],"per":[224],"image,":[225],"our":[226],"outperforms":[228],"existing":[229],"approaches":[232,242],"competitive":[235],"overall":[237],"performance":[238],"other":[240],"ImageNet":[244],"which":[245],"use":[246],"batchsize.":[249]},"counts_by_year":[],"updated_date":"2026-05-24T06:10:38.904174","created_date":"2026-05-17T00:00:00"}
