{"id":"https://openalex.org/W2767958491","doi":"https://doi.org/10.1145/3132847.3132940","title":"A Two-step Information Accumulation Strategy for Learning from Highly Imbalanced Data","display_name":"A Two-step Information Accumulation Strategy for Learning from Highly Imbalanced Data","publication_year":2017,"publication_date":"2017-11-06","ids":{"openalex":"https://openalex.org/W2767958491","doi":"https://doi.org/10.1145/3132847.3132940","mag":"2767958491"},"language":"en","primary_location":{"id":"doi:10.1145/3132847.3132940","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3132847.3132940","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 ACM on Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100395534","display_name":"Bin Liu","orcid":"https://orcid.org/0000-0002-8588-8744"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Bin Liu","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100402996","display_name":"Min Zhang","orcid":"https://orcid.org/0000-0003-3158-1920"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Min Zhang","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101524043","display_name":"Weizhi Ma","orcid":"https://orcid.org/0000-0001-5604-7527"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weizhi Ma","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100658188","display_name":"Xin Li","orcid":"https://orcid.org/0000-0002-9244-6485"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Li","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100668121","display_name":"Yiqun Liu","orcid":"https://orcid.org/0000-0002-0140-4512"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yiqun Liu","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100760812","display_name":"Shaoping Ma","orcid":"https://orcid.org/0000-0002-8762-8268"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaoping Ma","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100395534"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.39,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.71485303,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1289","last_page":"1298"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7687845826148987},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7606334686279297},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6036847233772278},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5952242016792297},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.557317852973938},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.43208515644073486},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.41527533531188965},{"id":"https://openalex.org/keywords/zoom","display_name":"Zoom","score":0.412285178899765},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.34339162707328796},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.06784135103225708}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7687845826148987},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7606334686279297},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6036847233772278},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5952242016792297},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.557317852973938},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.43208515644073486},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.41527533531188965},{"id":"https://openalex.org/C124913957","wikidata":"https://www.wikidata.org/wiki/Q1232548","display_name":"Zoom","level":3,"score":0.412285178899765},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.34339162707328796},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06784135103225708},{"id":"https://openalex.org/C78762247","wikidata":"https://www.wikidata.org/wiki/Q1273174","display_name":"Petroleum engineering","level":1,"score":0.0},{"id":"https://openalex.org/C15336307","wikidata":"https://www.wikidata.org/wiki/Q1766051","display_name":"Lens (geology)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3132847.3132940","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3132847.3132940","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2017 ACM on Conference on Information and Knowledge Management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.75,"display_name":"Reduced inequalities"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W5236451","https://openalex.org/W167016754","https://openalex.org/W1563938718","https://openalex.org/W1588282782","https://openalex.org/W1968817125","https://openalex.org/W1976035027","https://openalex.org/W1983320747","https://openalex.org/W2019858993","https://openalex.org/W2044224802","https://openalex.org/W2053724458","https://openalex.org/W2080021732","https://openalex.org/W2088641051","https://openalex.org/W2096945460","https://openalex.org/W2098370488","https://openalex.org/W2101234009","https://openalex.org/W2104167780","https://openalex.org/W2106479238","https://openalex.org/W2111316763","https://openalex.org/W2121222025","https://openalex.org/W2128678390","https://openalex.org/W2147148726","https://openalex.org/W2148143831","https://openalex.org/W2153635508","https://openalex.org/W2160150610","https://openalex.org/W2235812087","https://openalex.org/W2295598076","https://openalex.org/W2342049278","https://openalex.org/W2508831602","https://openalex.org/W2573900599","https://openalex.org/W2585770658","https://openalex.org/W2593875029","https://openalex.org/W2614183994","https://openalex.org/W2903158431","https://openalex.org/W2949541494","https://openalex.org/W2963857705","https://openalex.org/W3102476541","https://openalex.org/W4238811432","https://openalex.org/W4256361765","https://openalex.org/W6675354045"],"related_works":["https://openalex.org/W1487808658","https://openalex.org/W3172695526","https://openalex.org/W1757117718","https://openalex.org/W2889166412","https://openalex.org/W3204418343","https://openalex.org/W4292388283","https://openalex.org/W2981877337","https://openalex.org/W3203938600","https://openalex.org/W1560624709","https://openalex.org/W2169074127"],"abstract_inverted_index":{"Highly":[0],"imbalanced":[1,107,181],"data":[2,55,76,85,111,121,182,205],"is":[3,11,26,30,43,47,122,152],"common":[4],"in":[5,50,56,92],"the":[6,28,31,36,39,51,73,78,93,138,147,162,168,190,199,207],"real":[7,108],"world":[8],"and":[9,59,81,90,164,206],"it":[10],"important":[12],"but":[13,34,195],"difficult":[14],"to":[15,158,188],"train":[16],"an":[17],"effective":[18],"classifier.":[19],"In":[20],"this":[21],"paper,":[22],"Our":[23],"major":[24],"point":[25],"that":[27,44,132,183],"imbalance":[29],"observed":[32,136],"phenomenon":[33],"not":[35,186],"cause":[37],"of":[38,54,95,120,140,150,167,210],"problem.":[40],"The":[41,124],"challenge":[42],"useful":[45],"information":[46,202],"been":[48],"overshadowed":[49],"large":[52],"scale":[53],"both":[57],"majority":[58],"minority":[60],"classes.":[61],"We":[62],"propose":[63],"a":[64,174],"novel":[65],"two-step":[66],"strategy,":[67],"Information":[68,144],"Accumulation,":[69],"which":[70,160],"first":[71],"selects":[72],"most":[74,200],"discriminative":[75,201],"by":[77,86,154],"Zooming-in":[79],"phase,":[80],"then":[82],"leverages":[83],"unlabeled":[84,211],"pseudo":[87],"active":[88],"learning":[89,208],"self-training":[91],"phase":[94],"Learning":[96],"from":[97,156,203],"Learned":[98],"Results.":[99],"Comparative":[100],"experiments":[101],"are":[102,135],"conducted":[103],"on":[104,112,126,137,177,197],"large-scale":[105],"highly":[106,180],"customer":[109],"service":[110],"complaint":[113],"detection":[114],"task":[115],"(where":[116],"less":[117],"than":[118],"2%":[119],"positive).":[123],"results":[125,209],"eight":[127],"state-of-the-art":[128],"classification":[129],"algorithms":[130,142],"show":[131],"significant":[133],"improvements":[134],"performances":[139],"all":[141],"with":[143,179],"Accumulation(for":[145],"example,":[146],"F-Measure":[148],"score":[149],"Xgboost":[151],"increased":[153],"197%":[155],"0.115":[157],"0.347),":[159],"demonstrates":[161],"effectiveness":[163],"general":[165],"applicability":[166],"proposed":[169],"strategy.":[170],"This":[171],"work":[172],"explores":[173],"new":[175],"idea":[176],"dealing":[178],"we":[184],"do":[185],"aim":[187],"balance":[189],"training":[191],"examples":[192],"as":[193],"usual,":[194],"focus":[196],"finding":[198],"labeled":[204],"data.":[212]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
