{"id":"https://openalex.org/W2245583245","doi":"https://doi.org/10.1109/icdm.2015.53","title":"The Convergence Behavior of Naive Bayes on Large Sparse Datasets","display_name":"The Convergence Behavior of Naive Bayes on Large Sparse Datasets","publication_year":2015,"publication_date":"2015-11-01","ids":{"openalex":"https://openalex.org/W2245583245","doi":"https://doi.org/10.1109/icdm.2015.53","mag":"2245583245"},"language":"en","primary_location":{"id":"doi:10.1109/icdm.2015.53","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdm.2015.53","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100423209","display_name":"Xiang Li","orcid":"https://orcid.org/0000-0001-9595-8526"},"institutions":[{"id":"https://openalex.org/I125749732","display_name":"Western University","ror":"https://ror.org/02grkyz14","country_code":"CA","type":"education","lineage":["https://openalex.org/I125749732"]},{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CA","CN"],"is_corresponding":true,"raw_author_name":"Xiang Li","raw_affiliation_strings":["Computer Science Department, University of Western Ontario, Canada","School of Computer, National University of Defense Technology, China"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, University of Western Ontario, Canada","institution_ids":["https://openalex.org/I125749732"]},{"raw_affiliation_string":"School of Computer, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027158588","display_name":"Charles X. Ling","orcid":"https://orcid.org/0000-0003-3797-1348"},"institutions":[{"id":"https://openalex.org/I125749732","display_name":"Western University","ror":"https://ror.org/02grkyz14","country_code":"CA","type":"education","lineage":["https://openalex.org/I125749732"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Charles X. Ling","raw_affiliation_strings":["Computer Science Department, University of Western Ontario, Canada"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, University of Western Ontario, Canada","institution_ids":["https://openalex.org/I125749732"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101522100","display_name":"Huaimin Wang","orcid":"https://orcid.org/0000-0002-3245-1901"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huaimin Wang","raw_affiliation_strings":["School of Computer, National University of Defense Technology, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, National University of Defense Technology, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100423209"],"corresponding_institution_ids":["https://openalex.org/I125749732","https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.8629,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.83005054,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"853","last_page":"858"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/missing-data","display_name":"Missing data","score":0.7715035676956177},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.6790190935134888},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6653776168823242},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5983380675315857},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5901322960853577},{"id":"https://openalex.org/keywords/bayes-theorem","display_name":"Bayes' theorem","score":0.5016908645629883},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49611696600914},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.45177146792411804},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4253702461719513},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.26184508204460144},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.16505080461502075}],"concepts":[{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.7715035676956177},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.6790190935134888},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6653776168823242},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5983380675315857},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5901322960853577},{"id":"https://openalex.org/C207201462","wikidata":"https://www.wikidata.org/wiki/Q182505","display_name":"Bayes' theorem","level":3,"score":0.5016908645629883},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49611696600914},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.45177146792411804},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4253702461719513},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.26184508204460144},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.16505080461502075},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icdm.2015.53","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdm.2015.53","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W168332103","https://openalex.org/W1538057645","https://openalex.org/W1607060702","https://openalex.org/W1964869462","https://openalex.org/W2002352636","https://openalex.org/W2032026767","https://openalex.org/W2135813353","https://openalex.org/W2140785063","https://openalex.org/W2158698691","https://openalex.org/W2163614729","https://openalex.org/W2341535507","https://openalex.org/W6606875022","https://openalex.org/W6683915439","https://openalex.org/W6703949738"],"related_works":["https://openalex.org/W4380150146","https://openalex.org/W3024870410","https://openalex.org/W2410652950","https://openalex.org/W4390608645","https://openalex.org/W4283773154","https://openalex.org/W3139174110","https://openalex.org/W4289597203","https://openalex.org/W4247566972","https://openalex.org/W2537862391","https://openalex.org/W2417174640"],"abstract_inverted_index":{"Large":[0],"and":[1,30,65,75,82,115,131,137,143],"sparse":[2,83,132,167],"datasets":[3,85,100],"with":[4,35,134],"a":[5,20],"lot":[6],"of":[7,38,48,61,68,98,146],"missing":[8,96,114,126],"values":[9],"are":[10,51,101,154,162],"common":[11],"in":[12,79,169],"the":[13,36,46,66,72,94,124,139],"big":[14],"data":[15,63,95,113,125,133,168],"era.":[16],"Naive":[17],"Bayes":[18,50],"is":[19],"good":[21],"classification":[22],"algorithm":[23],"for":[24,164],"such":[25],"datasets,":[26],"as":[27],"its":[28],"time":[29],"space":[31],"complexity":[32],"scales":[33],"well":[34],"size":[37],"non-missing":[39],"values.":[40],"However,":[41],"several":[42,151],"important":[43],"questions":[44,91],"about":[45],"behavior":[47,145],"naive":[49,147],"yet":[52],"to":[53,118,128],"be":[54],"answered.":[55],"For":[56],"example,":[57],"how":[58],"different":[59],"mechanisms":[60,97,117,127],"missing,":[62],"sparseness":[64],"number":[67],"attributes":[69],"systematically":[70],"affect":[71],"learning":[73,141,165],"curves":[74],"convergence?":[76],"Recent":[77],"work":[78],"classifying":[80],"large":[81,130,166],"real-world":[84],"still":[86],"could":[87],"not":[88,102],"address":[89],"these":[90,99,120],"mainly":[92],"because":[93],"taken":[103],"into":[104],"account.":[105],"In":[106],"this":[107],"paper,":[108],"we":[109],"propose":[110],"two":[111],"novel":[112],"expansion":[116],"answer":[119],"questions.":[121],"We":[122,149],"use":[123],"generate":[129],"various":[135],"properties,":[136],"study":[138],"entire":[140],"curve":[142],"convergence":[144],"Bayes.":[148],"made":[150],"observations,":[152],"which":[153],"verified":[155],"through":[156],"detailed":[157],"theoretical":[158],"study.":[159],"Our":[160],"results":[161],"useful":[163],"practice.":[170]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
