{"id":"https://openalex.org/W4387892109","doi":"https://doi.org/10.48550/arxiv.2310.13499","title":"DistillCSE: Distilled Contrastive Learning for Sentence Embeddings","display_name":"DistillCSE: Distilled Contrastive Learning for Sentence Embeddings","publication_year":2023,"publication_date":"2023-10-20","ids":{"openalex":"https://openalex.org/W4387892109","doi":"https://doi.org/10.48550/arxiv.2310.13499"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2310.13499","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.13499","pdf_url":"https://arxiv.org/pdf/2310.13499","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2310.13499","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101622577","display_name":"Jiahao Xu","orcid":"https://orcid.org/0000-0001-6776-8215"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Xu, Jiahao","raw_affiliation_strings":["Nanyang Technological University,"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University,","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078512276","display_name":"Wei Shao","orcid":"https://orcid.org/0000-0001-7531-1055"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Shao, Wei","raw_affiliation_strings":["City Univeristy of Hong Kong,"],"affiliations":[{"raw_affiliation_string":"City Univeristy of Hong Kong,","institution_ids":["https://openalex.org/I168719708"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100718988","display_name":"Lihui Chen","orcid":"https://orcid.org/0000-0001-7452-1675"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Chen, Lihui","raw_affiliation_strings":["Nanyang Technological University,"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University,","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082918628","display_name":"Lemao Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liu, Lemao","raw_affiliation_strings":["Tencent AI Lab"],"affiliations":[{"raw_affiliation_string":"Tencent AI Lab","institution_ids":["https://openalex.org/I2250653659"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101622577"],"corresponding_institution_ids":["https://openalex.org/I172675005"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.7661457061767578},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7020389437675476},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.6822665929794312},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5602049231529236},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5353385210037231},{"id":"https://openalex.org/keywords/variance","display_name":"Variance (accounting)","score":0.5077112913131714},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.5045038461685181},{"id":"https://openalex.org/keywords/shuffling","display_name":"Shuffling","score":0.4479823708534241},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4309152066707611},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.42460232973098755},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.42345499992370605},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3565407395362854},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.19195201992988586},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.10022169351577759},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.09614214301109314},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.0924321711063385}],"concepts":[{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.7661457061767578},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7020389437675476},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.6822665929794312},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5602049231529236},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5353385210037231},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.5077112913131714},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.5045038461685181},{"id":"https://openalex.org/C167927819","wikidata":"https://www.wikidata.org/wiki/Q1930567","display_name":"Shuffling","level":2,"score":0.4479823708534241},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4309152066707611},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.42460232973098755},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.42345499992370605},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3565407395362854},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.19195201992988586},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.10022169351577759},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.09614214301109314},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0924321711063385},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2310.13499","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.13499","pdf_url":"https://arxiv.org/pdf/2310.13499","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2310.13499","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2310.13499","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2310.13499","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.13499","pdf_url":"https://arxiv.org/pdf/2310.13499","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.8600000143051147}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4387892109.pdf"},"referenced_works_count":62,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1537220347","https://openalex.org/W1614298861","https://openalex.org/W1821462560","https://openalex.org/W2095705004","https://openalex.org/W2101210369","https://openalex.org/W2111316763","https://openalex.org/W2250539671","https://openalex.org/W2250790822","https://openalex.org/W2251861449","https://openalex.org/W2605035112","https://openalex.org/W2740743644","https://openalex.org/W2762022354","https://openalex.org/W2785325870","https://openalex.org/W2786464815","https://openalex.org/W2798991696","https://openalex.org/W2890179025","https://openalex.org/W2908920565","https://openalex.org/W2952650870","https://openalex.org/W2963341956","https://openalex.org/W2963804993","https://openalex.org/W2965373594","https://openalex.org/W2969985801","https://openalex.org/W2970641574","https://openalex.org/W2979826702","https://openalex.org/W2986380433","https://openalex.org/W3005680577","https://openalex.org/W3099700870","https://openalex.org/W3100652389","https://openalex.org/W3104033643","https://openalex.org/W3105816068","https://openalex.org/W3105928338","https://openalex.org/W3105966348","https://openalex.org/W3108655343","https://openalex.org/W3113303810","https://openalex.org/W3115295967","https://openalex.org/W3122838366","https://openalex.org/W3154229486","https://openalex.org/W3156636935","https://openalex.org/W3172871932","https://openalex.org/W3173169192","https://openalex.org/W3173190788","https://openalex.org/W3173449346","https://openalex.org/W3173783447","https://openalex.org/W3175362188","https://openalex.org/W3176047188","https://openalex.org/W3189117283","https://openalex.org/W3199926081","https://openalex.org/W4200635076","https://openalex.org/W4206294441","https://openalex.org/W4224313754","https://openalex.org/W4225385501","https://openalex.org/W4249573750","https://openalex.org/W4285296644","https://openalex.org/W4286891192","https://openalex.org/W4287824654","https://openalex.org/W4288112596","https://openalex.org/W4294068600","https://openalex.org/W4297808394","https://openalex.org/W4385570895","https://openalex.org/W4385573552","https://openalex.org/W4389520386"],"related_works":["https://openalex.org/W4362597605","https://openalex.org/W1574414179","https://openalex.org/W4213239787","https://openalex.org/W4297676672","https://openalex.org/W3009056573","https://openalex.org/W2045745654","https://openalex.org/W2114337652","https://openalex.org/W4377131110","https://openalex.org/W4388857216","https://openalex.org/W2922073769"],"abstract_inverted_index":{"This":[0],"paper":[1,101],"proposes":[2],"the":[3,11,45,49,68,71,81,87,94,121,134],"DistillCSE":[4,21,47,136],"framework,":[5],"which":[6],"performs":[7],"contrastive":[8,90],"learning":[9],"under":[10],"self-training":[12],"paradigm":[13],"with":[14],"knowledge":[15,42,53,73,110],"distillation.":[16,43],"The":[17,63],"potential":[18],"advantage":[19],"of":[20,52,80,89],"is":[22],"its":[23],"self-enhancing":[24],"feature:":[25],"using":[26],"a":[27,35,76,112,144],"base":[28],"model":[29,37],"to":[30,60,86],"provide":[31],"additional":[32],"supervision":[33],"signals,":[34],"stronger":[36],"may":[38],"be":[39],"learned":[40],"through":[41,48],"However,":[44],"vanilla":[46],"standard":[50,72,130],"implementation":[51],"distillation":[54,74],"only":[55],"achieves":[56],"marginal":[57],"improvements":[58],"due":[59,85],"severe":[61],"overfitting.":[62],"further":[64],"quantitative":[65],"analyses":[66],"demonstrate":[67,132],"reason":[69],"that":[70,133],"exhibits":[75],"relatively":[77],"large":[78],"variance":[79],"teacher":[82,126],"model's":[83],"logits":[84,123],"essence":[88],"learning.":[91],"To":[92],"mitigate":[93],"issue":[95],"induced":[96],"by":[97],"high":[98],"variance,":[99],"this":[100],"accordingly":[102],"proposed":[103,135],"two":[104],"simple":[105],"yet":[106],"effective":[107],"solutions":[108],"for":[109],"distillation:":[111],"Group-P":[113],"shuffling":[114],"strategy":[115],"as":[116],"an":[117],"implicit":[118],"regularization":[119],"and":[120,142],"averaging":[122],"from":[124],"multiple":[125],"components.":[127],"Experiments":[128],"on":[129],"benchmarks":[131],"outperforms":[137],"many":[138],"strong":[139],"baseline":[140],"methods":[141],"yields":[143],"new":[145],"state-of-the-art":[146],"performance.":[147]},"counts_by_year":[],"updated_date":"2026-03-13T16:22:10.518609","created_date":"2025-10-10T00:00:00"}
