{"id":"https://openalex.org/W4296711100","doi":"https://doi.org/10.1142/s0218001422400067","title":"Distributed Clustering Approach by Apache Pyspark Based on SEER for Clinical Data","display_name":"Distributed Clustering Approach by Apache Pyspark Based on SEER for Clinical Data","publication_year":2022,"publication_date":"2022-09-21","ids":{"openalex":"https://openalex.org/W4296711100","doi":"https://doi.org/10.1142/s0218001422400067"},"language":"en","primary_location":{"id":"doi:10.1142/s0218001422400067","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218001422400067","pdf_url":null,"source":{"id":"https://openalex.org/S41486457","display_name":"International Journal of Pattern Recognition and Artificial Intelligence","issn_l":"0218-0014","issn":["0218-0014","1793-6381"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Pattern Recognition and Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100729634","display_name":"Ramesh Babu D R","orcid":null},"institutions":[{"id":"https://openalex.org/I20497027","display_name":"Cochin University of Science and Technology","ror":"https://ror.org/00a4kqq17","country_code":"IN","type":"education","lineage":["https://openalex.org/I20497027"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"R. Ramesh","raw_affiliation_strings":["Department of Computer Applications, Cochin University of Science and Technology (CUSAT) Cochin, Kerala 682022, India"],"affiliations":[{"raw_affiliation_string":"Department of Computer Applications, Cochin University of Science and Technology (CUSAT) Cochin, Kerala 682022, India","institution_ids":["https://openalex.org/I20497027"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043238482","display_name":"M. V. Judy","orcid":null},"institutions":[{"id":"https://openalex.org/I20497027","display_name":"Cochin University of Science and Technology","ror":"https://ror.org/00a4kqq17","country_code":"IN","type":"education","lineage":["https://openalex.org/I20497027"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"M. V. Judy","raw_affiliation_strings":["Department of Computer Applications, Cochin University of Science and Technology (CUSAT) Cochin, Kerala 682022, India"],"affiliations":[{"raw_affiliation_string":"Department of Computer Applications, Cochin University of Science and Technology (CUSAT) Cochin, Kerala 682022, India","institution_ids":["https://openalex.org/I20497027"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100729634"],"corresponding_institution_ids":["https://openalex.org/I20497027"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17945893,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"36","issue":"16","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11819","display_name":"Data-Driven Disease Surveillance","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11819","display_name":"Data-Driven Disease Surveillance","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8261003494262695},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6930551528930664},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6059848666191101},{"id":"https://openalex.org/keywords/cure-data-clustering-algorithm","display_name":"CURE data clustering algorithm","score":0.45989444851875305},{"id":"https://openalex.org/keywords/biclustering","display_name":"Biclustering","score":0.4334490895271301},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34860944747924805},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32112830877304077},{"id":"https://openalex.org/keywords/correlation-clustering","display_name":"Correlation clustering","score":0.31353533267974854}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8261003494262695},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6930551528930664},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6059848666191101},{"id":"https://openalex.org/C33704608","wikidata":"https://www.wikidata.org/wiki/Q5014717","display_name":"CURE data clustering algorithm","level":4,"score":0.45989444851875305},{"id":"https://openalex.org/C144817290","wikidata":"https://www.wikidata.org/wiki/Q2976575","display_name":"Biclustering","level":5,"score":0.4334490895271301},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34860944747924805},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32112830877304077},{"id":"https://openalex.org/C94641424","wikidata":"https://www.wikidata.org/wiki/Q5172845","display_name":"Correlation clustering","level":3,"score":0.31353533267974854}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s0218001422400067","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218001422400067","pdf_url":null,"source":{"id":"https://openalex.org/S41486457","display_name":"International Journal of Pattern Recognition and Artificial Intelligence","issn_l":"0218-0014","issn":["0218-0014","1793-6381"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Pattern Recognition and Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2003710473","https://openalex.org/W2026626867","https://openalex.org/W2082851087","https://openalex.org/W2118946263","https://openalex.org/W2147978281","https://openalex.org/W2173213060","https://openalex.org/W2276299242","https://openalex.org/W2513610598","https://openalex.org/W2517856412","https://openalex.org/W2591382767","https://openalex.org/W2607073028","https://openalex.org/W3214599805","https://openalex.org/W4247303792"],"related_works":["https://openalex.org/W3022637481","https://openalex.org/W3144143113","https://openalex.org/W3120229345","https://openalex.org/W1999117613","https://openalex.org/W2954170735","https://openalex.org/W2393816671","https://openalex.org/W3039964395","https://openalex.org/W1887359504","https://openalex.org/W2040929534","https://openalex.org/W2607902515"],"abstract_inverted_index":{"Data":[0],"clustering":[1,23,40,59,85,118,137,194],"is":[2],"a":[3,146],"thoroughly":[4],"studied":[5],"data":[6,101,117,144],"mining":[7],"issue.":[8],"As":[9],"the":[10,28,55,64,89,187,197],"amount":[11],"of":[12,47,72,76,189],"information":[13],"being":[14],"analyzed":[15,98],"grows":[16],"exponentially,":[17],"there":[18],"are":[19,42],"several":[20],"problems":[21],"with":[22],"diagnostic":[24],"large":[25],"datasets":[26],"like":[27],"monitoring,":[29],"microbiology,":[30],"and":[31,50,97,112,123,162],"end":[32],"results":[33],"(SEER)":[34],"carcinoma":[35],"feature":[36],"sets.":[37],"These":[38],"traditional":[39],"methods":[41],"severely":[43],"constrained":[44],"in":[45,128],"terms":[46],"speed,":[48],"productivity,":[49],"adaptability.":[51],"This":[52],"paper":[53],"summarizes":[54],"most":[56],"modern":[57],"distributed":[58,84,136,178,193],"algorithms,":[60],"organized":[61],"according":[62],"to":[63,68,80],"computing":[65],"platforms":[66],"used":[67],"process":[69],"vast":[70],"volumes":[71],"data.":[73],"The":[74],"purpose":[75],"this":[77,129],"work":[78],"was":[79,160],"offer":[81],"an":[82,176],"optimized":[83,177,192],"strategy":[86],"for":[87,115],"reducing":[88],"algorithm\u2019s":[90],"total":[91],"execution":[92],"time.":[93],"We":[94,184],"obtained,":[95],"preprocessed,":[96],"clinical":[99,143],"SEER":[100,142,166],"on":[102,141],"liver":[103],"cancer,":[104,106],"respiratory":[105],"human":[107],"immunodeficiency":[108],"virus":[109],"(HIV)-related":[110],"lymphoma,":[111],"lung":[113,171],"cancer":[114,150,167,172],"large-scale":[116],"analysis.":[119],"Three":[120],"major":[121],"contributions":[122],"their":[124],"effects":[125],"were":[126,139],"covered":[127],"paper:":[130],"To":[131],"begin,":[132],"three":[133,165],"current":[134],"Pyspark":[135],"algorithms":[138],"evaluated":[140],"using":[145,164],"simulated":[147],"New":[148],"York":[149],"dataset.":[151],"Second,":[152],"systemic":[153],"inflammatory":[154],"response":[155],"syndrome":[156],"(SIRS)":[157],"model":[158],"inference":[159],"done":[161],"described":[163],"datasets.":[168],"Third,":[169],"employing":[170],"data,":[173],"we":[174],"suggested":[175,191],"bisecting":[179],"[Formula:":[180],"see":[181],"text]-means":[182],"method.":[183],"have":[185],"shown":[186],"outcomes":[188],"our":[190],"technique,":[195],"demonstrating":[196],"performance":[198],"enhancement.":[199]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
