{"id":"https://openalex.org/W2146902533","doi":"https://doi.org/10.1109/mis.2004.1274912","title":"Advanced data preprocessing for intersites Web usage mining","display_name":"Advanced data preprocessing for intersites Web usage mining","publication_year":2004,"publication_date":"2004-03-01","ids":{"openalex":"https://openalex.org/W2146902533","doi":"https://doi.org/10.1109/mis.2004.1274912","mag":"2146902533"},"language":"en","primary_location":{"id":"doi:10.1109/mis.2004.1274912","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mis.2004.1274912","pdf_url":null,"source":{"id":"https://openalex.org/S114241109","display_name":"IEEE Intelligent Systems","issn_l":"1541-1672","issn":["1541-1672","1941-1294"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Intelligent Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114055714","display_name":"D. Tanasa","orcid":null},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en sciences et technologies du num\u00e9rique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283"]},{"id":"https://openalex.org/I4210150884","display_name":"Fondation Sophia Antipolis","ror":"https://ror.org/05nkcfv27","country_code":"FR","type":"other","lineage":["https://openalex.org/I4210150884"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"D. Tanasa","raw_affiliation_strings":["AxIS Project Team, I.N.R.I.A. Sophia Antipolis, Sophia-Antipolis, France","AxIS Project-Team, INRIA, Sophia Antipolis, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AxIS Project Team, I.N.R.I.A. Sophia Antipolis, Sophia-Antipolis, France","institution_ids":["https://openalex.org/I4210150884","https://openalex.org/I1326498283"]},{"raw_affiliation_string":"AxIS Project-Team, INRIA, Sophia Antipolis, France","institution_ids":["https://openalex.org/I1326498283"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038801565","display_name":"Brigitte Trousse","orcid":null},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en sciences et technologies du num\u00e9rique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283"]},{"id":"https://openalex.org/I4210150884","display_name":"Fondation Sophia Antipolis","ror":"https://ror.org/05nkcfv27","country_code":"FR","type":"other","lineage":["https://openalex.org/I4210150884"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"B. Trousse","raw_affiliation_strings":["AxIS Project Team, I.N.R.I.A. Sophia Antipolis, Sophia-Antipolis, France","AxIS Project-Team, INRIA, Sophia Antipolis, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AxIS Project Team, I.N.R.I.A. Sophia Antipolis, Sophia-Antipolis, France","institution_ids":["https://openalex.org/I4210150884","https://openalex.org/I1326498283"]},{"raw_affiliation_string":"AxIS Project-Team, INRIA, Sophia Antipolis, France","institution_ids":["https://openalex.org/I1326498283"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":25.0378,"has_fulltext":false,"cited_by_count":169,"citation_normalized_percentile":{"value":0.99324485,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"19","issue":"2","first_page":"59","last_page":"65"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.987500011920929,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8523191809654236},{"id":"https://openalex.org/keywords/web-mining","display_name":"Web mining","score":0.70525062084198},{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.6779255867004395},{"id":"https://openalex.org/keywords/web-log-analysis-software","display_name":"Web log analysis software","score":0.6548134684562683},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.5980357527732849},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5739372968673706},{"id":"https://openalex.org/keywords/web-server","display_name":"Web server","score":0.5193696618080139},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4904331862926483},{"id":"https://openalex.org/keywords/data-web","display_name":"Data Web","score":0.48644065856933594},{"id":"https://openalex.org/keywords/server","display_name":"Server","score":0.43975839018821716},{"id":"https://openalex.org/keywords/knowledge-extraction","display_name":"Knowledge extraction","score":0.42477571964263916},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4134410619735718},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.40405017137527466},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.3551943302154541},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.32004690170288086},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.2042865753173828},{"id":"https://openalex.org/keywords/web-api","display_name":"Web API","score":0.18458759784698486},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.11358392238616943}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8523191809654236},{"id":"https://openalex.org/C197046077","wikidata":"https://www.wikidata.org/wiki/Q785337","display_name":"Web mining","level":3,"score":0.70525062084198},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.6779255867004395},{"id":"https://openalex.org/C104352257","wikidata":"https://www.wikidata.org/wiki/Q1238961","display_name":"Web log analysis software","level":5,"score":0.6548134684562683},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.5980357527732849},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5739372968673706},{"id":"https://openalex.org/C11392498","wikidata":"https://www.wikidata.org/wiki/Q11288","display_name":"Web server","level":3,"score":0.5193696618080139},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4904331862926483},{"id":"https://openalex.org/C162005631","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Data Web","level":3,"score":0.48644065856933594},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.43975839018821716},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.42477571964263916},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4134410619735718},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.40405017137527466},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.3551943302154541},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.32004690170288086},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.2042865753173828},{"id":"https://openalex.org/C127613066","wikidata":"https://www.wikidata.org/wiki/Q557770","display_name":"Web API","level":4,"score":0.18458759784698486},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.11358392238616943}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/mis.2004.1274912","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mis.2004.1274912","pdf_url":null,"source":{"id":"https://openalex.org/S114241109","display_name":"IEEE Intelligent Systems","issn_l":"1541-1672","issn":["1541-1672","1941-1294"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Intelligent Systems","raw_type":"journal-article"},{"id":"pmh:oai:HAL:hal-00950763v1","is_oa":false,"landing_page_url":"https://inria.hal.science/hal-00950763","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.computer.org/csdl/mags/ex/2004/02/x2059-abs.html","raw_type":"Journal articles"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4399999976158142,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W1483085729","https://openalex.org/W1487646136","https://openalex.org/W1533634757","https://openalex.org/W1545976906","https://openalex.org/W1589386793","https://openalex.org/W1593088422","https://openalex.org/W1622437486","https://openalex.org/W1996869586","https://openalex.org/W2021434088","https://openalex.org/W2046166587","https://openalex.org/W2149483835","https://openalex.org/W2149636015","https://openalex.org/W2772031206","https://openalex.org/W4285719527","https://openalex.org/W6746995165"],"related_works":["https://openalex.org/W2365206651","https://openalex.org/W2184939770","https://openalex.org/W2084268810","https://openalex.org/W2339000767","https://openalex.org/W1584959763","https://openalex.org/W2187863252","https://openalex.org/W2351971554","https://openalex.org/W96919113","https://openalex.org/W2109169234","https://openalex.org/W3196114902"],"abstract_inverted_index":{"Web":[0,12,56,71,77,82,101,175],"usage":[1],"mining":[2,5],"applies":[3],"data":[4,21,38,120,125,127,130,141,147],"procedures":[6],"to":[7,46,59,86,109,155],"analyze":[8],"user":[9,61,67],"access":[10],"of":[11,51,64,145,160],"sites.":[13,176],"As":[14],"with":[15,76,170],"any":[16],"KDD":[17],"(knowledge":[18],"discovery":[19],"and":[20,32,58,115,129],"mining)":[22],"process,":[23],"WUM":[24,74,135],"contains":[25],"three":[26,123],"main":[27],"steps:":[28,124],"preprocessing,":[29,39],"knowledge":[30],"extraction,":[31],"results":[33],"analysis.":[34],"We":[35],"focus":[36],"on":[37,69],"a":[40,146],"fastidious,":[41],"complex":[42],"process.":[43],"Analysts":[44],"aim":[45],"determine":[47],"the":[48,55,70,87,94,99,112,117,153,158],"exact":[49],"list":[50],"users":[52],"who":[53],"accessed":[54],"site":[57],"reconstitute":[60,116],"sessions-the":[62],"sequence":[63],"actions":[65],"each":[66],"performed":[68],"site.":[72],"Intersites":[73],"deals":[75],"server":[78],"logs":[79],"from":[80,173],"several":[81],"sites,":[83],"generally":[84],"belonging":[85],"same":[88],"organization.":[89],"Thus,":[90],"analysts":[91],"must":[92],"reassemble":[93],"users'":[95],"path":[96],"through":[97],"all":[98,111],"different":[100],"servers":[102],"that":[103],"they":[104],"visited.":[105],"Our":[106,132],"solution":[107,133,166],"is":[108],"join":[110],"log":[113,171],"files":[114,172],"visit.":[118],"Classical":[119],"preprocessing":[121],"involves":[122],"fusion,":[126],"cleaning,":[128],"structuration.":[131],"for":[134],"adds":[136],"what":[137],"we":[138],"call":[139],"advanced":[140],"preprocessing.":[142],"This":[143],"consists":[144],"summarization":[148],"step,":[149],"which":[150],"will":[151],"allow":[152],"analyst":[154],"select":[156],"only":[157],"information":[159],"interest.":[161],"We've":[162],"successfully":[163],"tested":[164],"our":[165],"in":[167],"an":[168],"experiment":[169],"INRIA":[174]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":8},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":12},{"year":2015,"cited_by_count":12},{"year":2014,"cited_by_count":12},{"year":2013,"cited_by_count":12},{"year":2012,"cited_by_count":12}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
