{"id":"https://openalex.org/W2137587742","doi":"https://doi.org/10.1287/ijoc.15.2.148.14449","title":"On the Existence and Significance of Data Preprocessing Biases in Web-Usage Mining","display_name":"On the Existence and Significance of Data Preprocessing Biases in Web-Usage Mining","publication_year":2003,"publication_date":"2003-05-01","ids":{"openalex":"https://openalex.org/W2137587742","doi":"https://doi.org/10.1287/ijoc.15.2.148.14449","mag":"2137587742"},"language":"en","primary_location":{"id":"doi:10.1287/ijoc.15.2.148.14449","is_oa":false,"landing_page_url":"https://doi.org/10.1287/ijoc.15.2.148.14449","pdf_url":null,"source":{"id":"https://openalex.org/S165318533","display_name":"INFORMS journal on computing","issn_l":"1091-9856","issn":["1091-9856","1526-5528"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315699","host_organization_name":"Institute for Operations Research and the Management Sciences","host_organization_lineage":["https://openalex.org/P4310315699"],"host_organization_lineage_names":["Institute for Operations Research and the Management Sciences"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INFORMS Journal on Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038347162","display_name":"Zhiqiang Zheng","orcid":"https://orcid.org/0000-0001-8483-8713"},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Zhiqiang Zheng","raw_affiliation_strings":["Operations and Information Management, The Wharton School, University of Pennsylvania, 3730 Walnut Street, Philadelphia, Pennsylvania, 19104-6340, USA"],"affiliations":[{"raw_affiliation_string":"Operations and Information Management, The Wharton School, University of Pennsylvania, 3730 Walnut Street, Philadelphia, Pennsylvania, 19104-6340, USA","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050411634","display_name":"Balaji Padmanabhan","orcid":"https://orcid.org/0000-0002-3498-0778"},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Balaji Padmanabhan","raw_affiliation_strings":["Operations and Information Management, The Wharton School, University of Pennsylvania, 3730 Walnut Street, Philadelphia, Pennsylvania, 19104-6340, USA"],"affiliations":[{"raw_affiliation_string":"Operations and Information Management, The Wharton School, University of Pennsylvania, 3730 Walnut Street, Philadelphia, Pennsylvania, 19104-6340, USA","institution_ids":["https://openalex.org/I79576946"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028900472","display_name":"Steven O. Kimbrough","orcid":"https://orcid.org/0000-0001-7173-5272"},"institutions":[{"id":"https://openalex.org/I79576946","display_name":"University of Pennsylvania","ror":"https://ror.org/00b30xv10","country_code":"US","type":"education","lineage":["https://openalex.org/I79576946"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Steven O. Kimbrough","raw_affiliation_strings":["Operations and Information Management, The Wharton School, University of Pennsylvania, 3730 Walnut Street, Philadelphia, Pennsylvania, 19104-6340, USA"],"affiliations":[{"raw_affiliation_string":"Operations and Information Management, The Wharton School, University of Pennsylvania, 3730 Walnut Street, Philadelphia, Pennsylvania, 19104-6340, USA","institution_ids":["https://openalex.org/I79576946"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5038347162"],"corresponding_institution_ids":["https://openalex.org/I79576946"],"apc_list":null,"apc_paid":null,"fwci":6.3915,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.96408239,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"15","issue":"2","first_page":"148","last_page":"170"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12384","display_name":"Customer churn and segmentation","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/1406","display_name":"Marketing"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.8077578544616699},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7318781018257141},{"id":"https://openalex.org/keywords/session","display_name":"Session (web analytics)","score":0.7117599248886108},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.6914076209068298},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6201997995376587},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6178860068321228},{"id":"https://openalex.org/keywords/web-mining","display_name":"Web mining","score":0.4807819724082947},{"id":"https://openalex.org/keywords/logistic-regression","display_name":"Logistic regression","score":0.42984092235565186},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.34421437978744507},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33228546380996704},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.1605277955532074},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.10341256856918335}],"concepts":[{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.8077578544616699},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7318781018257141},{"id":"https://openalex.org/C2779182362","wikidata":"https://www.wikidata.org/wiki/Q17126187","display_name":"Session (web analytics)","level":2,"score":0.7117599248886108},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.6914076209068298},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6201997995376587},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6178860068321228},{"id":"https://openalex.org/C197046077","wikidata":"https://www.wikidata.org/wiki/Q785337","display_name":"Web mining","level":3,"score":0.4807819724082947},{"id":"https://openalex.org/C151956035","wikidata":"https://www.wikidata.org/wiki/Q1132755","display_name":"Logistic regression","level":2,"score":0.42984092235565186},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34421437978744507},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33228546380996704},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.1605277955532074},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.10341256856918335},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1287/ijoc.15.2.148.14449","is_oa":false,"landing_page_url":"https://doi.org/10.1287/ijoc.15.2.148.14449","pdf_url":null,"source":{"id":"https://openalex.org/S165318533","display_name":"INFORMS journal on computing","issn_l":"1091-9856","issn":["1091-9856","1526-5528"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310315699","host_organization_name":"Institute for Operations Research and the Management Sciences","host_organization_lineage":["https://openalex.org/P4310315699"],"host_organization_lineage_names":["Institute for Operations Research and the Management Sciences"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"INFORMS Journal on Computing","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.87.5368","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.87.5368","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://academic.missouriwestern.edu/mlewis14/LOP/LOP references/Data preprocessing bia in WUM.pdf","raw_type":"text"},{"id":"pmh:oai:RePEc:inm:orijoc:v:15:y:2003:i:2:p:148-170","is_oa":false,"landing_page_url":"http://doi.org/10.1287/ijoc.15.2.148.14449","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W142793689","https://openalex.org/W162533270","https://openalex.org/W203695064","https://openalex.org/W1485828998","https://openalex.org/W1523293200","https://openalex.org/W1525293755","https://openalex.org/W1569279788","https://openalex.org/W1593088422","https://openalex.org/W1605275907","https://openalex.org/W1657646951","https://openalex.org/W1912684809","https://openalex.org/W1989993068","https://openalex.org/W1996869586","https://openalex.org/W1998240134","https://openalex.org/W2011338930","https://openalex.org/W2017653675","https://openalex.org/W2024395071","https://openalex.org/W2037295150","https://openalex.org/W2047253786","https://openalex.org/W2054261883","https://openalex.org/W2063771604","https://openalex.org/W2074047175","https://openalex.org/W2080950999","https://openalex.org/W2096452841","https://openalex.org/W2103463156","https://openalex.org/W2107281124","https://openalex.org/W2112440119","https://openalex.org/W2118160636","https://openalex.org/W2134402836","https://openalex.org/W2140937627","https://openalex.org/W2141394518","https://openalex.org/W2578402870","https://openalex.org/W2987772435","https://openalex.org/W3003844013","https://openalex.org/W3125369468","https://openalex.org/W3147389599","https://openalex.org/W4212848460","https://openalex.org/W4251794855","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2383487638","https://openalex.org/W2384308529","https://openalex.org/W2363963840","https://openalex.org/W2376765408","https://openalex.org/W2373749036","https://openalex.org/W2351383156","https://openalex.org/W2889453578","https://openalex.org/W2546608234","https://openalex.org/W2351971554","https://openalex.org/W2373709144"],"abstract_inverted_index":{"The":[0,82],"literature":[1],"on":[2,42,75],"web-usage":[3],"mining":[4],"is":[5],"replete":[6],"with":[7],"data":[8,21,47,76,100],"preprocessing":[9,22,101],"techniques,":[10],"which":[11,107],"correspond":[12],"to":[13],"many":[14],"closely":[15],"related":[16],"problem":[17],"formulations.":[18],"We":[19],"survey":[20],"techniques":[23,33],"for":[24],"session-level":[25,39],"pattern":[26],"discovery":[27],"and":[28,69,94],"compare":[29],"three":[30,79,87],"of":[31,37,57,106],"these":[32],"in":[34,90],"the":[35,43,86,104],"context":[36],"understanding":[38],"purchase":[40],"behavior":[41,53],"web.":[44],"Using":[45],"real":[46],"collected":[48],"from":[49],"20,000":[50],"users'":[51],"browsing":[52],"over":[54],"a":[55,99],"period":[56],"six":[58],"months,":[59],"four":[60],"different":[61,80,92],"models":[62],"(linear":[63],"regressions,":[64,66],"logistic":[65],"neural":[67],"networks,":[68],"classification":[70],"trees)":[71],"are":[72],"built":[73],"based":[74],"preprocessed":[77],"using":[78],"techniques.":[81],"results":[83],"demonstrate":[84],"that":[85,98],"approaches":[88],"result":[89],"radically":[91],"conclusions":[93],"provide":[95],"initial":[96],"evidence":[97],"bias":[102],"exists,":[103],"effect":[105],"can":[108],"be":[109],"significant.":[110]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
