{"id":"https://openalex.org/W2152941520","doi":"https://doi.org/10.1145/1401890.1401907","title":"Partitioned logistic regression for spam filtering","display_name":"Partitioned logistic regression for spam filtering","publication_year":2008,"publication_date":"2008-08-24","ids":{"openalex":"https://openalex.org/W2152941520","doi":"https://doi.org/10.1145/1401890.1401907","mag":"2152941520"},"language":"en","primary_location":{"id":"doi:10.1145/1401890.1401907","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1401890.1401907","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076904467","display_name":"Ming\u2010Wei Chang","orcid":"https://orcid.org/0000-0002-0137-8895"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ming-wei Chang","raw_affiliation_strings":["University of Illinois Urbana Champaign, Urbana, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066873932","display_name":"Wen-tau Yih","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wen-tau Yih","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102772073","display_name":"Christopher Meek","orcid":"https://orcid.org/0000-0003-1696-6152"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher Meek","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5076904467"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":12.2453,"has_fulltext":false,"cited_by_count":46,"citation_normalized_percentile":{"value":0.98306311,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"97","last_page":"105"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/logistic-regression","display_name":"Logistic regression","score":0.8387112617492676},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.8349388241767883},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7082697153091431},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6478896141052246},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6348533034324646},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5639125108718872},{"id":"https://openalex.org/keywords/bayes-theorem","display_name":"Bayes' theorem","score":0.5307908654212952},{"id":"https://openalex.org/keywords/logistic-model-tree","display_name":"Logistic model tree","score":0.5134945511817932},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4515399932861328},{"id":"https://openalex.org/keywords/bayes-error-rate","display_name":"Bayes error rate","score":0.4250318109989166},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.4037835896015167},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3670433759689331},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.3278070092201233},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3225228786468506},{"id":"https://openalex.org/keywords/bayesian-probability","display_name":"Bayesian probability","score":0.27018752694129944},{"id":"https://openalex.org/keywords/bayes-classifier","display_name":"Bayes classifier","score":0.1924634575843811}],"concepts":[{"id":"https://openalex.org/C151956035","wikidata":"https://www.wikidata.org/wiki/Q1132755","display_name":"Logistic regression","level":2,"score":0.8387112617492676},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.8349388241767883},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7082697153091431},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6478896141052246},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6348533034324646},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5639125108718872},{"id":"https://openalex.org/C207201462","wikidata":"https://www.wikidata.org/wiki/Q182505","display_name":"Bayes' theorem","level":3,"score":0.5307908654212952},{"id":"https://openalex.org/C61722155","wikidata":"https://www.wikidata.org/wiki/Q6667643","display_name":"Logistic model tree","level":3,"score":0.5134945511817932},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4515399932861328},{"id":"https://openalex.org/C143809311","wikidata":"https://www.wikidata.org/wiki/Q4874458","display_name":"Bayes error rate","level":5,"score":0.4250318109989166},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.4037835896015167},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3670433759689331},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.3278070092201233},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3225228786468506},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.27018752694129944},{"id":"https://openalex.org/C185207860","wikidata":"https://www.wikidata.org/wiki/Q17004744","display_name":"Bayes classifier","level":4,"score":0.1924634575843811},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1401890.1401907","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1401890.1401907","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.186.9110","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.186.9110","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://research.microsoft.com/pubs/73709/ChangYihMeek%20-%20KDD-08.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7599999904632568,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W3805906","https://openalex.org/W6316171","https://openalex.org/W154296565","https://openalex.org/W176125184","https://openalex.org/W202303397","https://openalex.org/W1534477342","https://openalex.org/W1592138725","https://openalex.org/W1648885110","https://openalex.org/W2014959658","https://openalex.org/W2041597619","https://openalex.org/W2063787211","https://openalex.org/W2079182758","https://openalex.org/W2085302848","https://openalex.org/W2089086886","https://openalex.org/W2102294561","https://openalex.org/W2105895401","https://openalex.org/W2140336868","https://openalex.org/W2140785063","https://openalex.org/W2142340904","https://openalex.org/W2143537651","https://openalex.org/W2147785294","https://openalex.org/W2147891303","https://openalex.org/W2148603752","https://openalex.org/W2150973844","https://openalex.org/W2156909104","https://openalex.org/W2158275940","https://openalex.org/W2160536005","https://openalex.org/W2161017011","https://openalex.org/W2163614729","https://openalex.org/W2167216307","https://openalex.org/W2167277498","https://openalex.org/W2169075655","https://openalex.org/W2169384781","https://openalex.org/W2171622762","https://openalex.org/W2171849160","https://openalex.org/W2296452361","https://openalex.org/W2916045930","https://openalex.org/W2917688635","https://openalex.org/W2978329087","https://openalex.org/W4214529620","https://openalex.org/W6600171677","https://openalex.org/W6606284879","https://openalex.org/W6607217061","https://openalex.org/W6608351394","https://openalex.org/W6636944375","https://openalex.org/W6675580205","https://openalex.org/W6681093246","https://openalex.org/W6684907601","https://openalex.org/W7071374342"],"related_works":["https://openalex.org/W2374047926","https://openalex.org/W179179905","https://openalex.org/W2394466068","https://openalex.org/W2360982908","https://openalex.org/W2086147528","https://openalex.org/W2393473353","https://openalex.org/W1973600295","https://openalex.org/W145653800","https://openalex.org/W4312309445","https://openalex.org/W2537862391"],"abstract_inverted_index":{"Naive":[0],"Bayes":[1,48,69,105,153],"and":[2,24,41,70,97,122,148,154],"logistic":[3,60,71,95,155],"regression":[4,96],"perform":[5],"well":[6,26],"in":[7,28,129],"different":[8],"regimes.":[9],"While":[10],"the":[11,76,103,138,159],"former":[12],"is":[13,20,32,118],"a":[14,33,55,109,130],"very":[15],"simple":[16],"generative":[17],"model":[18,35,74,117],"which":[19,36,62],"efficient":[21],"to":[22,45,107,151],"train":[23],"performs":[25],"empirically":[27],"many":[29],"applications,the":[30],"latter":[31],"discriminative":[34],"often":[37],"achieves":[38],"better":[39,119],"accuracy":[40],"can":[42],"be":[43],"shown":[44],"outperform":[46],"naive":[47,68,104,152],"asymptotically.":[49],"In":[50,124],"this":[51],"paper,":[52],"we":[53],"propose":[54],"novel":[56],"hybrid":[57],"model,":[58],"partitioned":[59],"regression,":[61,156],"has":[63],"several":[64,81],"advantages":[65],"over":[66],"both":[67,120],"regression.":[72],"This":[73],"separates":[75],"original":[77],"feature":[78,83],"space":[79],"into":[80],"disjoint":[82],"groups.":[84],"Individual":[85],"models":[86],"on":[87],"these":[88],"groups":[89],"of":[90],"features":[91],"are":[92,100],"learned":[93],"using":[94,102,158],"their":[98],"predictions":[99],"combined":[101],"principle":[106],"produce":[108],"robust":[110],"final":[111],"estimation.":[112],"We":[113],"show":[114],"that":[115],"our":[116],"theoretically":[121],"empirically.":[123],"addition,":[125],"when":[126,157],"applying":[127],"it":[128,136],"practical":[131],"application,":[132],"email":[133],"spam":[134],"filtering,":[135],"improves":[137],"normalized":[139],"AUC":[140],"score":[141],"at":[142],"10%":[143],"false-positive":[144],"rate":[145],"by":[146],"28.8%":[147],"23.6%":[149],"compared":[150],"exact":[160],"same":[161],"training":[162],"examples.":[163]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":5},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":8},{"year":2012,"cited_by_count":4}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
