{"id":"https://openalex.org/W2736789322","doi":"https://doi.org/10.1145/3102254.3102288","title":"Data miners' little helper","display_name":"Data miners' little helper","publication_year":2017,"publication_date":"2017-06-19","ids":{"openalex":"https://openalex.org/W2736789322","doi":"https://doi.org/10.1145/3102254.3102288","mag":"2736789322"},"language":"en","primary_location":{"id":"doi:10.1145/3102254.3102288","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3102254.3102288","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th International Conference on Web Intelligence, Mining and Semantics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053742461","display_name":"Tania Cerquitelli","orcid":"https://orcid.org/0000-0002-9039-6226"},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Polytechnic University of Turin","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Tania Cerquitelli","raw_affiliation_strings":["Politecnico di Torino, Turin, Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Torino, Turin, Italy","institution_ids":["https://openalex.org/I177477856"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018429999","display_name":"Evelina Di Corso","orcid":"https://orcid.org/0000-0002-3988-3512"},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Polytechnic University of Turin","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Evelina Di Corso","raw_affiliation_strings":["Politecnico di Torino, Turin, Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Torino, Turin, Italy","institution_ids":["https://openalex.org/I177477856"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081839375","display_name":"Francesco Ventura","orcid":"https://orcid.org/0000-0003-3398-8265"},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Polytechnic University of Turin","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Francesco Ventura","raw_affiliation_strings":["Politecnico di Torino, Turin, Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Torino, Turin, Italy","institution_ids":["https://openalex.org/I177477856"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091589252","display_name":"Silvia Chiusano","orcid":"https://orcid.org/0000-0002-5740-5004"},"institutions":[{"id":"https://openalex.org/I177477856","display_name":"Polytechnic University of Turin","ror":"https://ror.org/00bgk9508","country_code":"IT","type":"education","lineage":["https://openalex.org/I177477856"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Silvia Chiusano","raw_affiliation_strings":["Politecnico di Torino, Turin, Italy"],"affiliations":[{"raw_affiliation_string":"Politecnico di Torino, Turin, Italy","institution_ids":["https://openalex.org/I177477856"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5053742461"],"corresponding_institution_ids":["https://openalex.org/I177477856"],"apc_list":null,"apc_paid":null,"fwci":0.7801,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.79124612,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.47218599915504456}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47218599915504456}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3102254.3102288","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3102254.3102288","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th International Conference on Web Intelligence, Mining and Semantics","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W125616588","https://openalex.org/W1565377632","https://openalex.org/W1987971958","https://openalex.org/W1993731684","https://openalex.org/W1999680238","https://openalex.org/W2054648720","https://openalex.org/W2070042030","https://openalex.org/W2087208114","https://openalex.org/W2131975293","https://openalex.org/W2161562001","https://openalex.org/W2424472887","https://openalex.org/W2428804159","https://openalex.org/W2492652153","https://openalex.org/W2613406265","https://openalex.org/W2911964244","https://openalex.org/W4300826148"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W2530322880"],"abstract_inverted_index":{"In":[0,27],"this":[1,38],"paper":[2],"we":[3],"propose":[4],"a":[5,34,58,109,121,185],"new":[6,39],"self-learning":[7],"engine":[8,65],"to":[9,18,47,51,102,130,171],"streamline":[10],"the":[11,28,48,53,83,99,103,126,132,136,179],"analytics":[12],"process,":[13],"as":[14],"it":[15],"enables":[16],"analysts":[17],"mine":[19],"massive":[20],"data":[21,68,76],"repositories":[22],"with":[23,175,184],"minimal":[24],"user":[25],"intervention.":[26],"context":[29],"of":[30,36,114,147,159],"cluster":[31,84],"analysis":[32],"on":[33,63,116,125,150,156],"collection":[35],"documents":[37],"system,":[40],"named":[41],"SELF-DATA":[42,61,105,148],"(SELF-learning":[43],"DAta":[44],"TrAnsformation),":[45],"suggests":[46],"analyst":[49],"how":[50],"configure":[52],"whole":[54,137],"mining":[55,138],"process":[56,139],"for":[57,135,140],"given":[59],"dataset.":[60,143],"relies":[62],"an":[64,141],"exploring":[66],"different":[67,91],"weighting":[69],"schemas":[70],"(e.g.,":[71,79,94],"normalized":[72],"term":[73],"frequencies)":[74],"and":[75,87,97,120,166,178],"transformation":[77],"methods":[78],"PCA)":[80],"before":[81],"applying":[82],"analysis,":[85],"evaluating":[86],"comparing":[88],"solutions":[89,101],"through":[90],"quality":[92],"indices":[93],"weighted":[95],"Silhouette),":[96],"presenting":[98],"k-top":[100],"analyst.":[104],"will":[106],"also":[107],"include":[108],"knowledge":[110,127],"base":[111,128],"storing":[112],"results":[113,162],"experiments":[115],"previously":[117],"processed":[118],"datasets,":[119,177],"classification":[122],"algorithm":[123],"trained":[124],"content":[129],"forecast":[131],"best":[133],"configuration":[134],"unexplored":[142],"The":[144],"first":[145],"development":[146],"running":[149],"Apache":[151],"Spark":[152],"has":[153],"been":[154],"validated":[155],"5":[157],"collections":[158],"documents.":[160],"Experimental":[161],"highlight":[163],"that":[164],"TF-IDF":[165],"logarithmic":[167],"entropy":[168],"are":[169],"effective":[170],"measure":[172],"item":[173],"relevance":[174],"sparse":[176],"LSI":[180],"method":[181],"outperforms":[182],"PCA":[183],"large":[186],"dictionary.":[187]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
