{"id":"https://openalex.org/W4386025837","doi":"https://doi.org/10.1109/access.2023.3306805","title":"Solving Data Imbalance in Text Classification With Constructing Contrastive Samples","display_name":"Solving Data Imbalance in Text Classification With Constructing Contrastive Samples","publication_year":2023,"publication_date":"2023-01-01","ids":{"openalex":"https://openalex.org/W4386025837","doi":"https://doi.org/10.1109/access.2023.3306805"},"language":"en","primary_location":{"id":"doi:10.1109/access.2023.3306805","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3306805","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10225302.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10225302.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092063564","display_name":"Xi Chen","orcid":"https://orcid.org/0000-0003-1381-7637"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xi Chen","raw_affiliation_strings":["Advanced Institution of Information Technology, Peking University, Hangzhou, China","Advanced Institution of Information Technology Peking University, No.233, Yonghui Rd, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"Advanced Institution of Information Technology, Peking University, Hangzhou, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Advanced Institution of Information Technology Peking University, No.233, Yonghui Rd, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101555891","display_name":"Wei Zhang","orcid":"https://orcid.org/0009-0001-5847-663X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Zhang","raw_affiliation_strings":["Advanced Institution of Information Technology, Peking University, Hangzhou, China","Advanced Institution of Information Technology Peking University, No.233, Yonghui Rd, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"Advanced Institution of Information Technology, Peking University, Hangzhou, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Advanced Institution of Information Technology Peking University, No.233, Yonghui Rd, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101850182","display_name":"Shuai Pan","orcid":"https://orcid.org/0000-0002-8195-7087"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuai Pan","raw_affiliation_strings":["Advanced Institution of Information Technology, Peking University, Hangzhou, China","Advanced Institution of Information Technology Peking University, No.233, Yonghui Rd, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"Advanced Institution of Information Technology, Peking University, Hangzhou, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Advanced Institution of Information Technology Peking University, No.233, Yonghui Rd, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101464065","display_name":"Jiayin Chen","orcid":"https://orcid.org/0000-0003-0452-2618"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiayin Chen","raw_affiliation_strings":["Advanced Institution of Information Technology, Peking University, Hangzhou, China","Advanced Institution of Information Technology Peking University, No.233, Yonghui Rd, Hangzhou, Zhejiang, China"],"affiliations":[{"raw_affiliation_string":"Advanced Institution of Information Technology, Peking University, Hangzhou, China","institution_ids":["https://openalex.org/I20231570"]},{"raw_affiliation_string":"Advanced Institution of Information Technology Peking University, No.233, Yonghui Rd, Hangzhou, Zhejiang, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5092063564"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":1.0438,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.81366528,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"11","issue":null,"first_page":"90554","last_page":"90562"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7212848663330078},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4782387614250183},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.46971362829208374}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7212848663330078},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4782387614250183},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46971362829208374}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2023.3306805","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3306805","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10225302.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:5c96aa58ba084e5fa1400fae0ceb4c81","is_oa":true,"landing_page_url":"https://doaj.org/article/5c96aa58ba084e5fa1400fae0ceb4c81","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 11, Pp 90554-90562 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2023.3306805","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2023.3306805","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10225302.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.8299999833106995}],"awards":[{"id":"https://openalex.org/G8801657146","display_name":null,"funder_award_id":"2022YFF0903302","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4386025837.pdf","grobid_xml":"https://content.openalex.org/works/W4386025837.grobid-xml"},"referenced_works_count":46,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1832693441","https://openalex.org/W2137165876","https://openalex.org/W2138621090","https://openalex.org/W2148143831","https://openalex.org/W2154359981","https://openalex.org/W2170240176","https://openalex.org/W2250966211","https://openalex.org/W2490420619","https://openalex.org/W2740721704","https://openalex.org/W2842511635","https://openalex.org/W2896457183","https://openalex.org/W2944828972","https://openalex.org/W2962984188","https://openalex.org/W2963626623","https://openalex.org/W2965373594","https://openalex.org/W3005680577","https://openalex.org/W3009157386","https://openalex.org/W3026732421","https://openalex.org/W3034693764","https://openalex.org/W3034781633","https://openalex.org/W3035524453","https://openalex.org/W3090114880","https://openalex.org/W3108655343","https://openalex.org/W3115295967","https://openalex.org/W3156636935","https://openalex.org/W3167214817","https://openalex.org/W3174242940","https://openalex.org/W3176624977","https://openalex.org/W3187071356","https://openalex.org/W3196420684","https://openalex.org/W3202656664","https://openalex.org/W4230418757","https://openalex.org/W4288058759","https://openalex.org/W4297808394","https://openalex.org/W4385245566","https://openalex.org/W6631190155","https://openalex.org/W6682839988","https://openalex.org/W6685053522","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6766673545","https://openalex.org/W6774314701","https://openalex.org/W6777837344","https://openalex.org/W6783961830","https://openalex.org/W6787655056"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Contrastive":[0],"learning":[1,16,124],"(CL)":[2],"has":[3,19,29],"been":[4],"successfully":[5],"applied":[6],"in":[7,23,76],"Natural":[8],"Language":[9],"Processing":[10],"(NLP)":[11],"as":[12,173,175],"a":[13,69,89,106],"powerful":[14],"representation":[15,62,154],"method":[17,84,184],"and":[18,53,60,151],"shown":[20],"promising":[21],"results":[22],"various":[24],"downstream":[25],"tasks.":[26],"Recent":[27],"research":[28],"highlighted":[30],"the":[31,86,98,103,117,158,176,189],"importance":[32],"of":[33,88,105,160],"constructing":[34,80],"effective":[35],"contrastive":[36,81,123],"samples":[37,59,96,115],"through":[38],"data":[39,43,74],"augmentation.":[40],"However,":[41],"current":[42],"augmentation":[44],"methods":[45],"primarily":[46],"rely":[47],"on":[48,166],"random":[49],"word":[50],"deletion,":[51],"substitution,":[52],"cropping,":[54],"which":[55,136],"may":[56],"introduce":[57],"noisy":[58],"hinder":[61],"learning.":[63,155],"In":[64],"this":[65],"article,":[66],"we":[67,128,163],"propose":[68],"novel":[70],"approach":[71,146],"to":[72,92,111,125,131],"address":[73],"imbalance":[75],"text":[77,134,139],"classification":[78,140],"by":[79],"samples.":[82],"Our":[83,145],"involves":[85],"use":[87],"Label-indicative":[90],"Component":[91],"generate":[93],"high-quality":[94],"positive":[95],"for":[97,182],"minority":[99],"class,":[100],"along":[101],"with":[102,142],"introduction":[104],"Hard":[107],"Negative":[108],"Mixing":[109],"strategy":[110],"synthesize":[112],"challenging":[113],"negative":[114],"at":[116,188],"feature":[118],"level.":[119],"By":[120],"applying":[121],"supervised":[122],"these":[126],"samples,":[127],"are":[129],"able":[130],"obtain":[132],"superior":[133],"representations,":[135],"significantly":[137],"benefit":[138],"tasks":[141],"imbalanced":[143,177],"data.":[144],"effectively":[147],"mitigates":[148],"distributional":[149],"biases":[150],"promotes":[152],"noise-resistant":[153],"To":[156],"validate":[157],"effectiveness":[159],"our":[161,183],"method,":[162],"conducted":[164],"experiments":[165],"benchmark":[167],"datasets":[168],"(THUCNews,":[169],"AG\u2019s":[170],"News,":[171],"20NG)":[172],"well":[174],"FDCNews":[178],"dataset.":[179],"The":[180],"code":[181],"is":[185],"publicly":[186],"available":[187],"following":[190],"GitHub":[191],"repository:":[192],"https://github.com/hanggun/CLDMTC.":[193]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
