{"id":"https://openalex.org/W1991821412","doi":"https://doi.org/10.1145/2818567.2818653","title":"Effect of Corpus Size Selection on Performance of Map-Reduce Based Distributed K-Means for Big Textual Data Clustering","display_name":"Effect of Corpus Size Selection on Performance of Map-Reduce Based Distributed K-Means for Big Textual Data Clustering","publication_year":2015,"publication_date":"2015-09-25","ids":{"openalex":"https://openalex.org/W1991821412","doi":"https://doi.org/10.1145/2818567.2818653","mag":"1991821412"},"language":"en","primary_location":{"id":"doi:10.1145/2818567.2818653","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2818567.2818653","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Sixth International Conference on Computer and Communication Technology 2015","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059069176","display_name":"Shwet Ketu","orcid":null},"institutions":[{"id":"https://openalex.org/I26072440","display_name":"Indian Institute of Information Technology Allahabad","ror":"https://ror.org/03rgjt374","country_code":"IN","type":"education","lineage":["https://openalex.org/I26072440"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Shwet Ketu","raw_affiliation_strings":["Indian Institute of Information Technology, Allahabad, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Information Technology, Allahabad, India","institution_ids":["https://openalex.org/I26072440"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103111507","display_name":"Bakshi Rohit Prasad","orcid":"https://orcid.org/0000-0003-1921-9494"},"institutions":[{"id":"https://openalex.org/I26072440","display_name":"Indian Institute of Information Technology Allahabad","ror":"https://ror.org/03rgjt374","country_code":"IN","type":"education","lineage":["https://openalex.org/I26072440"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Bakshi Rohit Prasad","raw_affiliation_strings":["Indian Institute of Information Technology, Allahabad, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Information Technology, Allahabad, India","institution_ids":["https://openalex.org/I26072440"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004401828","display_name":"Sonali Agarwal","orcid":"https://orcid.org/0000-0001-9083-5033"},"institutions":[{"id":"https://openalex.org/I26072440","display_name":"Indian Institute of Information Technology Allahabad","ror":"https://ror.org/03rgjt374","country_code":"IN","type":"education","lineage":["https://openalex.org/I26072440"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sonali Agarwal","raw_affiliation_strings":["Indian Institute of Information Technology, Allahabad, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Information Technology, Allahabad, India","institution_ids":["https://openalex.org/I26072440"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5059069176"],"corresponding_institution_ids":["https://openalex.org/I26072440"],"apc_list":null,"apc_paid":null,"fwci":1.01794244,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.82120553,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":93,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"256","last_page":"260"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11396","display_name":"Artificial Intelligence in Healthcare","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9871000051498413,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.84205162525177},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.8065453767776489},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.7417288422584534},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7403212785720825},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7328831553459167},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6327421069145203},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5968838930130005},{"id":"https://openalex.org/keywords/volume","display_name":"Volume (thermodynamics)","score":0.5292567610740662},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3861757516860962},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35015204548835754},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.11017066240310669}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.84205162525177},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.8065453767776489},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.7417288422584534},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7403212785720825},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7328831553459167},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6327421069145203},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5968838930130005},{"id":"https://openalex.org/C20556612","wikidata":"https://www.wikidata.org/wiki/Q4469374","display_name":"Volume (thermodynamics)","level":2,"score":0.5292567610740662},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3861757516860962},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35015204548835754},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.11017066240310669},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2818567.2818653","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2818567.2818653","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Sixth International Conference on Computer and Communication Technology 2015","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1487615702","https://openalex.org/W1505837402","https://openalex.org/W1534768706","https://openalex.org/W1967838552","https://openalex.org/W2057923756","https://openalex.org/W2063958902","https://openalex.org/W2069433961","https://openalex.org/W2094004017","https://openalex.org/W2094746278","https://openalex.org/W2104644701","https://openalex.org/W2116762767","https://openalex.org/W2125800352","https://openalex.org/W2141975087","https://openalex.org/W2163016402","https://openalex.org/W2166445532","https://openalex.org/W2182408114","https://openalex.org/W2332062499","https://openalex.org/W2933266202","https://openalex.org/W4235539094","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W4390608645","https://openalex.org/W4321353415","https://openalex.org/W4247566972","https://openalex.org/W2745001401","https://openalex.org/W4394895745","https://openalex.org/W2960264696","https://openalex.org/W3090563135","https://openalex.org/W2497432351","https://openalex.org/W4206777497"],"abstract_inverted_index":{"In":[0],"current":[1],"era,":[2],"we":[3],"are":[4,28],"experiencing":[5],"tremendous":[6],"growth":[7],"in":[8,43,145],"database":[9],"sizes,":[10],"types,":[11],"users,":[12],"working":[13],"environments":[14],"and":[15,30,132],"data":[16,61,68,71,98],"access":[17],"speeds.":[18],"This":[19,89],"situation":[20],"coined":[21],"a":[22,53,112],"new":[23],"term":[24],"Big":[25,45],"Data":[26,46],"which":[27,51],"large":[29,86],"complex":[31],"datasets":[32,121],"used":[33,124],"for":[34,111],"extracting":[35],"meaningful":[36],"knowledge.":[37],"One":[38],"of":[39,56,59,80,115,147],"the":[40,85,127,139],"main":[41],"challenges":[42],"processing":[44,149],"is":[47,52,92,136],"its":[48],"huge":[49,57],"volume":[50],"common":[54],"characteristic":[55],"collection":[58],"textual":[60,67],"also.":[62],"Handling":[63],"such":[64,74],"voluminous":[65],"big":[66,96],"using":[69,100],"conventional":[70],"mining":[72],"techniques":[73],"as":[75],"clustering":[76,99],"becomes":[77],"impractical":[78],"because":[79],"algorithmic":[81],"incompetence":[82],"to":[83,125],"address":[84],"computation":[87,117,133],"time.":[88,118,134,150],"research":[90],"work":[91],"mainly":[93],"focused":[94],"on":[95],"text":[97],"MapReduce":[101],"based":[102],"Distributed":[103],"K-Means":[104],"algorithm":[105],"combined":[106],"with":[107],"corpus":[108,130,140],"selection":[109,141],"technique":[110,142],"significant":[113],"decrement":[114],"overall":[116,148],"Four":[119],"benchmark":[120],"have":[122],"been":[123],"explore":[126],"relationship":[128],"between":[129],"size":[131],"It":[135],"found":[137],"that":[138],"significantly":[143],"effective":[144],"reduction":[146]},"counts_by_year":[{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2016,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
