{"id":"https://openalex.org/W3013319350","doi":"https://doi.org/10.1007/s10115-020-01467-y","title":"A scalable and effective rough set theory-based approach for big data pre-processing","display_name":"A scalable and effective rough set theory-based approach for big data pre-processing","publication_year":2020,"publication_date":"2020-05-02","ids":{"openalex":"https://openalex.org/W3013319350","doi":"https://doi.org/10.1007/s10115-020-01467-y","mag":"3013319350"},"language":"en","primary_location":{"id":"doi:10.1007/s10115-020-01467-y","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10115-020-01467-y","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10115-020-01467-y.pdf","source":{"id":"https://openalex.org/S81770430","display_name":"Knowledge and Information Systems","issn_l":"0219-1377","issn":["0219-1377","0219-3116"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Knowledge and Information Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10115-020-01467-y.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018448962","display_name":"Zaineb Chelly Dagdia","orcid":"https://orcid.org/0000-0002-2551-6586"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I16038530","display_name":"Aberystwyth University","ror":"https://ror.org/015m2p889","country_code":"GB","type":"education","lineage":["https://openalex.org/I16038530"]},{"id":"https://openalex.org/I4210121838","display_name":"Laboratoire Lorrain de Recherche en Informatique et ses Applications","ror":"https://ror.org/02vnf0c38","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I277688954","https://openalex.org/I4210107720","https://openalex.org/I4210121838","https://openalex.org/I4210159245","https://openalex.org/I90183372"]},{"id":"https://openalex.org/I4210140729","display_name":"Institut Sup\u00e9rieur de Gestion de Tunis","ror":"https://ror.org/04w9mdw91","country_code":"TN","type":"education","lineage":["https://openalex.org/I108714496","https://openalex.org/I4210140729"]},{"id":"https://openalex.org/I90183372","display_name":"Universit\u00e9 de Lorraine","ror":"https://ror.org/04vfs2w97","country_code":"FR","type":"education","lineage":["https://openalex.org/I90183372"]}],"countries":["FR","GB","TN"],"is_corresponding":true,"raw_author_name":"Zaineb Chelly\u00a0Dagdia","raw_affiliation_strings":["CNRS, Inria, LORIA, Universit\u00e9 de Lorraine, 54000, Nancy, France","Department of Computer Science, Aberystwyth University, Aberystwyth, UK","LARODEC, Institut Sup\u00e9rieur de Gestion de Tunis, Tunis, Tunisia","ISG - Institut Sup\u00e9rieur de Gestion de Tunis [Tunis] (41 Avenue de la Libert\u00e9, cit\u00e9 Bouchoucha, le Bardo, 2000 - Tunisia)","Aberystwyth University (Penglais, Aberystwyth, Ceredigion, SY23 3FL - United Kingdom)","MULTISPEECH - Speech Modeling for Facilitating Oral-Based Communication (France)"],"raw_orcid":"https://orcid.org/0000-0002-2551-6586","affiliations":[{"raw_affiliation_string":"CNRS, Inria, LORIA, Universit\u00e9 de Lorraine, 54000, Nancy, France","institution_ids":["https://openalex.org/I4210121838","https://openalex.org/I90183372","https://openalex.org/I1294671590"]},{"raw_affiliation_string":"Department of Computer Science, Aberystwyth University, Aberystwyth, UK","institution_ids":["https://openalex.org/I16038530"]},{"raw_affiliation_string":"LARODEC, Institut Sup\u00e9rieur de Gestion de Tunis, Tunis, Tunisia","institution_ids":["https://openalex.org/I4210140729"]},{"raw_affiliation_string":"ISG - Institut Sup\u00e9rieur de Gestion de Tunis [Tunis] (41 Avenue de la Libert\u00e9, cit\u00e9 Bouchoucha, le Bardo, 2000 - Tunisia)","institution_ids":["https://openalex.org/I4210140729"]},{"raw_affiliation_string":"Aberystwyth University (Penglais, Aberystwyth, Ceredigion, SY23 3FL - United Kingdom)","institution_ids":["https://openalex.org/I16038530"]},{"raw_affiliation_string":"MULTISPEECH - Speech Modeling for Facilitating Oral-Based Communication (France)","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083499604","display_name":"Christine Zarges","orcid":"https://orcid.org/0000-0002-2829-4296"},"institutions":[{"id":"https://openalex.org/I16038530","display_name":"Aberystwyth University","ror":"https://ror.org/015m2p889","country_code":"GB","type":"education","lineage":["https://openalex.org/I16038530"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Christine Zarges","raw_affiliation_strings":["Department of Computer Science, Aberystwyth University, Aberystwyth, UK","Aberystwyth University (Penglais, Aberystwyth, Ceredigion, SY23 3FL - United Kingdom)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Aberystwyth University, Aberystwyth, UK","institution_ids":["https://openalex.org/I16038530"]},{"raw_affiliation_string":"Aberystwyth University (Penglais, Aberystwyth, Ceredigion, SY23 3FL - United Kingdom)","institution_ids":["https://openalex.org/I16038530"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001516070","display_name":"Ga\u00ebl Beck","orcid":"https://orcid.org/0000-0002-5228-2666"},"institutions":[{"id":"https://openalex.org/I4210091279","display_name":"Universit\u00e9 Sorbonne Paris Nord","ror":"https://ror.org/0199hds37","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210091279"]},{"id":"https://openalex.org/I4210156583","display_name":"Laboratoire d'Informatique de Paris-Nord","ror":"https://ror.org/05g1zjw44","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I4210091279","https://openalex.org/I4210156583","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Ga\u00ebl Beck","raw_affiliation_strings":["Computer Science Laboratory (LIPN), University Paris-North - 13, Villetaneuse, France","UP13 - Universit\u00e9 Paris 13 (France)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science Laboratory (LIPN), University Paris-North - 13, Villetaneuse, France","institution_ids":["https://openalex.org/I4210156583"]},{"raw_affiliation_string":"UP13 - Universit\u00e9 Paris 13 (France)","institution_ids":["https://openalex.org/I4210091279"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023422312","display_name":"Mustapha Lebbah","orcid":"https://orcid.org/0000-0001-7245-6371"},"institutions":[{"id":"https://openalex.org/I4210091279","display_name":"Universit\u00e9 Sorbonne Paris Nord","ror":"https://ror.org/0199hds37","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210091279"]},{"id":"https://openalex.org/I4210156583","display_name":"Laboratoire d'Informatique de Paris-Nord","ror":"https://ror.org/05g1zjw44","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I4210091279","https://openalex.org/I4210156583","https://openalex.org/I4210159245"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Mustapha Lebbah","raw_affiliation_strings":["Computer Science Laboratory (LIPN), University Paris-North - 13, Villetaneuse, France","UP13 - Universit\u00e9 Paris 13 (France)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Science Laboratory (LIPN), University Paris-North - 13, Villetaneuse, France","institution_ids":["https://openalex.org/I4210156583"]},{"raw_affiliation_string":"UP13 - Universit\u00e9 Paris 13 (France)","institution_ids":["https://openalex.org/I4210091279"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5018448962"],"corresponding_institution_ids":["https://openalex.org/I1294671590","https://openalex.org/I16038530","https://openalex.org/I4210121838","https://openalex.org/I4210140729","https://openalex.org/I90183372"],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890},"apc_paid":{"value":2290,"currency":"EUR","value_usd":2890},"fwci":2.4778,"has_fulltext":true,"cited_by_count":22,"citation_normalized_percentile":{"value":0.91306956,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"62","issue":"8","first_page":"3321","last_page":"3386"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11063","display_name":"Rough Sets and Fuzzy Logic","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11063","display_name":"Rough Sets and Fuzzy Logic","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.984000027179718,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7932517528533936},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.713873028755188},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.6526462435722351},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.6499818563461304},{"id":"https://openalex.org/keywords/rough-set","display_name":"Rough set","score":0.6117652654647827},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5952258110046387},{"id":"https://openalex.org/keywords/knowledge-extraction","display_name":"Knowledge extraction","score":0.4954010248184204},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.47675666213035583},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4691031873226166},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.4470495879650116},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.41178038716316223},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3937464952468872},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38926059007644653},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.09787324070930481}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7932517528533936},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.713873028755188},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.6526462435722351},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.6499818563461304},{"id":"https://openalex.org/C111012933","wikidata":"https://www.wikidata.org/wiki/Q3137210","display_name":"Rough set","level":2,"score":0.6117652654647827},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5952258110046387},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.4954010248184204},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.47675666213035583},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4691031873226166},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.4470495879650116},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.41178038716316223},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3937464952468872},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38926059007644653},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.09787324070930481},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1007/s10115-020-01467-y","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10115-020-01467-y","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10115-020-01467-y.pdf","source":{"id":"https://openalex.org/S81770430","display_name":"Knowledge and Information Systems","issn_l":"0219-1377","issn":["0219-1377","0219-3116"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Knowledge and Information Systems","raw_type":"journal-article"},{"id":"pmh:oai:aber.ac.uk:publications/4dc0c124-06c8-4da4-97c3-a78ac75fca92","is_oa":true,"landing_page_url":"https://pure.aber.ac.uk/portal/en/publications/a-scalable-and-effective-rough-set-theory-based-approach-for-big-data-preprocessing(4dc0c124-06c8-4da4-97c3-a78ac75fca92).html","pdf_url":"http://pure.aber.ac.uk/ws/files/37252362/ChellyDagdia2020_Article_AScalableAndEffectiveRoughSetT.pdf","source":{"id":"https://openalex.org/S4306401660","display_name":"Aberystwyth Research portal (Aberystwyth University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I16038530","host_organization_name":"Aberystwyth University","host_organization_lineage":["https://openalex.org/I16038530"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""},{"id":"pmh:oai:HAL:hal-02880626v1","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-02880626","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISSN: 0219-1377","raw_type":"info:eu-repo/semantics/article"},{"id":"pmh:oai:HAL:hal-04456307v1","is_oa":true,"landing_page_url":"https://hal.science/hal-04456307","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISSN: 0219-1377","raw_type":"info:eu-repo/semantics/article"},{"id":"pmh:oai:aber.ac.uk:Publications/4dc0c124-06c8-4da4-97c3-a78ac75fca92","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401660","display_name":"Aberystwyth Research portal (Aberystwyth University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I16038530","host_organization_name":"Aberystwyth University","host_organization_lineage":["https://openalex.org/I16038530"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":""}],"best_oa_location":{"id":"doi:10.1007/s10115-020-01467-y","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10115-020-01467-y","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10115-020-01467-y.pdf","source":{"id":"https://openalex.org/S81770430","display_name":"Knowledge and Information Systems","issn_l":"0219-1377","issn":["0219-1377","0219-3116"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Knowledge and Information Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6712770983","display_name":"Optimised Framework based on Rough Set Theory for Big Data Pre-processing in Certain and Imprecise Contexts","funder_award_id":"702527","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G8130121852","display_name":"Optimised Framework based on Rough Set Theory for Big Data Pre-processing in Certain and Imprecise Contexts","funder_award_id":"702527","funder_id":"https://openalex.org/F4320338337","funder_display_name":"H2020 Marie Sk\u0142odowska-Curie Actions"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320338337","display_name":"H2020 Marie Sk\u0142odowska-Curie Actions","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3013319350.pdf","grobid_xml":"https://content.openalex.org/works/W3013319350.grobid-xml"},"referenced_works_count":47,"referenced_works":["https://openalex.org/W74439177","https://openalex.org/W1485408073","https://openalex.org/W1513494614","https://openalex.org/W1547566968","https://openalex.org/W1557923305","https://openalex.org/W1593693781","https://openalex.org/W1619226191","https://openalex.org/W1673075472","https://openalex.org/W1870481460","https://openalex.org/W1952835952","https://openalex.org/W1967657094","https://openalex.org/W1977298291","https://openalex.org/W1982861695","https://openalex.org/W2027664395","https://openalex.org/W2029307344","https://openalex.org/W2040263621","https://openalex.org/W2068431618","https://openalex.org/W2074634340","https://openalex.org/W2079680557","https://openalex.org/W2102831150","https://openalex.org/W2110173188","https://openalex.org/W2127097372","https://openalex.org/W2128464104","https://openalex.org/W2133462743","https://openalex.org/W2134691826","https://openalex.org/W2143451122","https://openalex.org/W2154053567","https://openalex.org/W2154185789","https://openalex.org/W2157355837","https://openalex.org/W2157690157","https://openalex.org/W2159128662","https://openalex.org/W2164364358","https://openalex.org/W2196468142","https://openalex.org/W2237307454","https://openalex.org/W2287696922","https://openalex.org/W2316630624","https://openalex.org/W2415186390","https://openalex.org/W2545851563","https://openalex.org/W2592371352","https://openalex.org/W2600796512","https://openalex.org/W2771139966","https://openalex.org/W2783605012","https://openalex.org/W2795686572","https://openalex.org/W3120740533","https://openalex.org/W4246198815","https://openalex.org/W4249247926","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2392963705","https://openalex.org/W2107349454","https://openalex.org/W2382278777","https://openalex.org/W1964260090","https://openalex.org/W2353240132","https://openalex.org/W2375932290","https://openalex.org/W2005378346","https://openalex.org/W2348430812","https://openalex.org/W2369293046","https://openalex.org/W2379409486"],"abstract_inverted_index":{"Abstract":[0],"A":[1,28],"big":[2,195],"challenge":[3,42],"in":[4,35,99],"the":[5,36,57,87,90,94,106,154],"knowledge":[6],"discovery":[7],"process":[8],"is":[9,128],"to":[10,38,64,85,118,165,194],"perform":[11],"data":[12,22,60,91,102,107,120,147,161],"pre-processing,":[13,148],"specifically":[14,149],"feature":[15,71,151,183],"selection,":[16,152],"on":[17],"a":[18,137,177],"large":[19],"amount":[20],"of":[21,30,46,50,96],"and":[23,92,109,139,180],"high":[24],"dimensional":[25],"attribute":[26],"set.":[27],"variety":[29],"techniques":[31,52],"have":[32,168],"been":[33,169],"proposed":[34,174],"literature":[37],"deal":[39],"with":[40,43,163],"this":[41,133],"different":[44],"degrees":[45],"success":[47],"as":[48,126],"most":[49],"these":[51,76],"need":[53,63],"further":[54],"information":[55],"about":[56],"given":[58],"input":[59,101],"for":[61,145,150],"thresholding,":[62],"specify":[65],"noise":[66],"levels":[67],"or":[68],"use":[69],"some":[70],"ranking":[72],"procedures.":[73],"To":[74],"overcome":[75],"limitations,":[77],"rough":[78,141],"set":[79,103,142],"theory":[80],"(RST)":[81],"can":[82],"be":[83],"used":[84],"discover":[86],"dependency":[88],"within":[89],"reduce":[93],"number":[95],"attributes":[97,167],"enclosed":[98],"an":[100],"while":[104],"using":[105],"alone":[108],"requiring":[110],"no":[111],"supplementary":[112],"information.":[113],"However,":[114],"when":[115],"it":[116,127,192],"comes":[117],"massive":[119],"sets,":[121],"RST":[122],"reaches":[123],"its":[124,182],"limits":[125],"highly":[129],"computationally":[130],"expensive.":[131],"In":[132,157],"paper,":[134],"we":[135],"propose":[136],"scalable":[138],"effective":[140],"theory-based":[143],"approach":[144],"large-scale":[146],"under":[153],"Spark":[155],"framework.":[156],"our":[158,173],"detailed":[159],"experiments,":[160],"sets":[162],"up":[164],"10,000":[166],"considered,":[170],"revealing":[171],"that":[172],"solution":[175],"achieves":[176],"good":[178],"speedup":[179],"performs":[181],"selection":[184],"task":[185],"well":[186],"without":[187],"sacrificing":[188],"performance.":[189],"Thus,":[190],"making":[191],"relevant":[193],"data.":[196]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":1}],"updated_date":"2026-05-23T08:51:43.019350","created_date":"2025-10-10T00:00:00"}
