{"id":"https://openalex.org/W2888155479","doi":"https://doi.org/10.1145/3233547.3233687","title":"Optimal Clustering with Missing Values","display_name":"Optimal Clustering with Missing Values","publication_year":2018,"publication_date":"2018-08-15","ids":{"openalex":"https://openalex.org/W2888155479","doi":"https://doi.org/10.1145/3233547.3233687","mag":"2888155479"},"language":"en","primary_location":{"id":"doi:10.1145/3233547.3233687","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3233547.3233687","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3233547.3233687","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3233547.3233687","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046509031","display_name":"Shahin Boluki","orcid":"https://orcid.org/0000-0002-5015-8995"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shahin Boluki","raw_affiliation_strings":["Texas A&amp;M University, College Station, TX, USA"],"affiliations":[{"raw_affiliation_string":"Texas A&amp;M University, College Station, TX, USA","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066234514","display_name":"Siamak Zamani Dadaneh","orcid":"https://orcid.org/0000-0001-8808-8622"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Siamak Zamani Dadaneh","raw_affiliation_strings":["Texas A&amp;M University, College Station, TX, USA"],"affiliations":[{"raw_affiliation_string":"Texas A&amp;M University, College Station, TX, USA","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073946580","display_name":"Xiaoning Qian","orcid":"https://orcid.org/0000-0002-4347-2476"},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaoning Qian","raw_affiliation_strings":["Texas A&amp;M University, College Station, TX, USA"],"affiliations":[{"raw_affiliation_string":"Texas A&amp;M University, College Station, TX, USA","institution_ids":["https://openalex.org/I91045830"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112221423","display_name":"Edward R. Dougherty","orcid":null},"institutions":[{"id":"https://openalex.org/I91045830","display_name":"Texas A&M University","ror":"https://ror.org/01f5ytq51","country_code":"US","type":"education","lineage":["https://openalex.org/I91045830"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Edward R. Dougherty","raw_affiliation_strings":["Texas A&amp;M University, College Station, TX, USA"],"affiliations":[{"raw_affiliation_string":"Texas A&amp;M University, College Station, TX, USA","institution_ids":["https://openalex.org/I91045830"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5046509031"],"corresponding_institution_ids":["https://openalex.org/I91045830"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.07630153,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"593","last_page":"594"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9957000017166138,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11901","display_name":"Bayesian Methods and Mixture Models","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10136","display_name":"Statistical Methods and Inference","score":0.9803000092506409,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8106282949447632},{"id":"https://openalex.org/keywords/missing-data","display_name":"Missing data","score":0.7880406379699707},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6802833676338196},{"id":"https://openalex.org/keywords/imputation","display_name":"Imputation (statistics)","score":0.658861517906189},{"id":"https://openalex.org/keywords/fuzzy-clustering","display_name":"Fuzzy clustering","score":0.5945772528648376},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.592038631439209},{"id":"https://openalex.org/keywords/correlation-clustering","display_name":"Correlation clustering","score":0.5018463134765625},{"id":"https://openalex.org/keywords/cure-data-clustering-algorithm","display_name":"CURE data clustering algorithm","score":0.45719826221466064},{"id":"https://openalex.org/keywords/hierarchical-clustering","display_name":"Hierarchical clustering","score":0.4376249313354492},{"id":"https://openalex.org/keywords/consensus-clustering","display_name":"Consensus clustering","score":0.4237840175628662},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38861283659935},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.34139037132263184},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.28431379795074463}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8106282949447632},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.7880406379699707},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6802833676338196},{"id":"https://openalex.org/C58041806","wikidata":"https://www.wikidata.org/wiki/Q1660484","display_name":"Imputation (statistics)","level":3,"score":0.658861517906189},{"id":"https://openalex.org/C17212007","wikidata":"https://www.wikidata.org/wiki/Q5511111","display_name":"Fuzzy clustering","level":3,"score":0.5945772528648376},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.592038631439209},{"id":"https://openalex.org/C94641424","wikidata":"https://www.wikidata.org/wiki/Q5172845","display_name":"Correlation clustering","level":3,"score":0.5018463134765625},{"id":"https://openalex.org/C33704608","wikidata":"https://www.wikidata.org/wiki/Q5014717","display_name":"CURE data clustering algorithm","level":4,"score":0.45719826221466064},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.4376249313354492},{"id":"https://openalex.org/C186767784","wikidata":"https://www.wikidata.org/wiki/Q5162841","display_name":"Consensus clustering","level":5,"score":0.4237840175628662},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38861283659935},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.34139037132263184},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28431379795074463}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3233547.3233687","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3233547.3233687","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3233547.3233687","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3233547.3233687","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3233547.3233687","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3233547.3233687","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 ACM International Conference on Bioinformatics, Computational Biology, and Health Informatics","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.7099999785423279,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G3936902016","display_name":null,"funder_award_id":"1553281","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2888155479.pdf","grobid_xml":"https://content.openalex.org/works/W2888155479.grobid-xml"},"referenced_works_count":9,"referenced_works":["https://openalex.org/W2025183726","https://openalex.org/W2146614932","https://openalex.org/W2158111773","https://openalex.org/W2179438025","https://openalex.org/W2245213139","https://openalex.org/W2774435077","https://openalex.org/W2778292275","https://openalex.org/W2799330337","https://openalex.org/W2964335858"],"related_works":["https://openalex.org/W2384052049","https://openalex.org/W2367205823","https://openalex.org/W2087424554","https://openalex.org/W2406185607","https://openalex.org/W4220814143","https://openalex.org/W2131625050","https://openalex.org/W1574003721","https://openalex.org/W2311450085","https://openalex.org/W1981213098","https://openalex.org/W2139280638"],"abstract_inverted_index":{"Missing":[0,23],"values":[1,24,50,97],"frequently":[2],"arise":[3],"in":[4,51,92,98,236,259],"modern":[5],"biomedical":[6,260],"studies":[7,164],"due":[8],"to":[9,35,57,112,185,232],"various":[10,186,233],"reasons,":[11],"including":[12,189],"missing":[13,49,61,84,96,182,216,240,257],"tests":[14],"or":[15],"complex":[16],"profiling":[17],"technologies":[18],"for":[19,46,152,212,222,254],"different":[20,239],"omics":[21],"measurements.":[22],"can":[25],"complicate":[26],"the":[27,52,60,66,70,81,99,123,129,138,142,149,153,173,177,197,215,220,226,247],"application":[28],"of":[29,54,75,83,101,132,176,225],"clustering":[30,55,67,108,134,180,187,205,234],"algorithms,":[31],"whose":[32],"goals":[33],"are":[34],"group":[36],"points":[37],"based":[38,201],"on":[39,69,80,165],"some":[40],"similarity":[41],"criterion.":[42],"A":[43],"common":[44],"practice":[45],"dealing":[47,213,255],"with":[48,110,157,181,196,214,238,256],"context":[53,100],"is":[56,88],"first":[58],"impute":[59],"values,":[62,183],"and":[63,168,193,209,242],"then":[64],"apply":[65],"algorithm":[68],"completed":[71],"data.":[72,227],"The":[73],"performance":[74,175,230],"such":[76],"methods,":[77],"however,":[78],"depends":[79],"knowledge":[82],"value":[85,217],"mechanism,":[86],"which":[87,104],"rarely":[89],"fully":[90],"achievable":[91],"practice.":[93],"We":[94,120],"consider":[95],"optimal":[102,107,133,179,248],"clustering,":[103,195],"finds":[105],"an":[106,113,158],"operator":[109],"reference":[111],"underlying":[114],"random":[115],"labeled":[116],"point":[117],"process":[118,140],"(RLPP).":[119],"present":[121],"how":[122],"missing-value":[124,139],"problem":[125],"fits":[126],"neatly":[127],"into":[128],"overall":[130],"framework":[131,151,211],"by":[135],"marginalizing":[136],"out":[137],"from":[141],"feature":[143],"distribution.":[144],"In":[145],"particular,":[146],"we":[147],"demonstrate":[148],"proposed":[150,178],"multivariate":[154],"Gaussian":[155],"model":[156],"arbitrary":[159],"covariance":[160],"structure.":[161],"Comprehensive":[162],"experimental":[163],"both":[166],"synthetic":[167],"real-world":[169],"RNA-seq":[170],"data":[171,258],"shows":[172],"superior":[174,229],"compared":[184,231],"approaches,":[188],"k-means,":[190],"fuzzy":[191],"c-means":[192],"hierarchical":[194],"off-the-shelf":[198],"Gibbs":[199],"sampling":[200],"imputation":[202],"method.":[203],"Optimal":[204],"offers":[206],"a":[207,251],"robust":[208],"flexible":[210],"problem,":[218],"obviating":[219],"need":[221],"imputation-based":[223],"pre-processing":[224],"Its":[228],"methods":[235],"settings":[237],"rates":[241],"small":[243],"sample":[244],"sizes,":[245],"demonstrates":[246],"clusterer":[249],"as":[250],"promising":[252],"tool":[253],"applications.":[261]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
