{"id":"https://openalex.org/W2048631447","doi":"https://doi.org/10.1142/s0219720004000351","title":"DEVELOPING OPTIMAL PREDICTION MODELS FOR CANCER CLASSIFICATION USING GENE EXPRESSION DATA","display_name":"DEVELOPING OPTIMAL PREDICTION MODELS FOR CANCER CLASSIFICATION USING GENE EXPRESSION DATA","publication_year":2004,"publication_date":"2004-01-01","ids":{"openalex":"https://openalex.org/W2048631447","doi":"https://doi.org/10.1142/s0219720004000351","mag":"2048631447","pmid":"https://pubmed.ncbi.nlm.nih.gov/15290759"},"language":"en","primary_location":{"id":"doi:10.1142/s0219720004000351","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0219720004000351","pdf_url":null,"source":{"id":"https://openalex.org/S155349577","display_name":"Journal of Bioinformatics and Computational Biology","issn_l":"0219-7200","issn":["0219-7200","1757-6334"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311754","host_organization_name":"Imperial College Press","host_organization_lineage":["https://openalex.org/P4310311754"],"host_organization_lineage_names":["Imperial College Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Bioinformatics and Computational Biology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036673955","display_name":"Mat Soukup","orcid":null},"institutions":[{"id":"https://openalex.org/I51556381","display_name":"University of Virginia","ror":"https://ror.org/0153tk833","country_code":"US","type":"education","lineage":["https://openalex.org/I51556381"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"MAT SOUKUP","raw_affiliation_strings":["Department of Statistics, University of Virginia, Halsey Hall, Charlottesville, VA 22904-4135, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Statistics, University of Virginia, Halsey Hall, Charlottesville, VA 22904-4135, USA","institution_ids":["https://openalex.org/I51556381"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039418200","display_name":"Jae K. Lee","orcid":"https://orcid.org/0000-0001-7413-9783"},"institutions":[{"id":"https://openalex.org/I51556381","display_name":"University of Virginia","ror":"https://ror.org/0153tk833","country_code":"US","type":"education","lineage":["https://openalex.org/I51556381"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"JAE K. LEE","raw_affiliation_strings":["Division of Biostatistics and Epidemiology, School of Medicine, University of Virginia, Charlottesville, VA 22908, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Division of Biostatistics and Epidemiology, School of Medicine, University of Virginia, Charlottesville, VA 22908, USA","institution_ids":["https://openalex.org/I51556381"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9647,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.72641865,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"01","issue":"04","first_page":"681","last_page":"694"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11970","display_name":"Molecular Biology Techniques and Applications","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9817000031471252,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.605620265007019},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5631740689277649},{"id":"https://openalex.org/keywords/dna-microarray","display_name":"DNA microarray","score":0.5627400279045105},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.5581696629524231},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.5066614747047424},{"id":"https://openalex.org/keywords/predictive-modelling","display_name":"Predictive modelling","score":0.47099870443344116},{"id":"https://openalex.org/keywords/linear-discriminant-analysis","display_name":"Linear discriminant analysis","score":0.43721187114715576},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43497687578201294},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.41544392704963684},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.41261476278305054},{"id":"https://openalex.org/keywords/microarray-analysis-techniques","display_name":"Microarray analysis techniques","score":0.4106658101081848},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.3343472182750702},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3250628113746643},{"id":"https://openalex.org/keywords/gene-expression","display_name":"Gene expression","score":0.2638881206512451},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.23055598139762878},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.17193260788917542}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.605620265007019},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5631740689277649},{"id":"https://openalex.org/C95371953","wikidata":"https://www.wikidata.org/wiki/Q591745","display_name":"DNA microarray","level":4,"score":0.5627400279045105},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.5581696629524231},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.5066614747047424},{"id":"https://openalex.org/C45804977","wikidata":"https://www.wikidata.org/wiki/Q7239673","display_name":"Predictive modelling","level":2,"score":0.47099870443344116},{"id":"https://openalex.org/C69738355","wikidata":"https://www.wikidata.org/wiki/Q1228929","display_name":"Linear discriminant analysis","level":2,"score":0.43721187114715576},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43497687578201294},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41544392704963684},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.41261476278305054},{"id":"https://openalex.org/C8415881","wikidata":"https://www.wikidata.org/wiki/Q6839217","display_name":"Microarray analysis techniques","level":4,"score":0.4106658101081848},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.3343472182750702},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3250628113746643},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.2638881206512451},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.23055598139762878},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.17193260788917542},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D003110","descriptor_name":"Colonic Neoplasms","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":false},{"descriptor_ui":"D003110","descriptor_name":"Colonic Neoplasms","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":false},{"descriptor_ui":"D003110","descriptor_name":"Colonic Neoplasms","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":false},{"descriptor_ui":"D003110","descriptor_name":"Colonic Neoplasms","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D003110","descriptor_name":"Colonic Neoplasms","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D003110","descriptor_name":"Colonic Neoplasms","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D003627","descriptor_name":"Data Interpretation, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003627","descriptor_name":"Data Interpretation, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D003627","descriptor_name":"Data Interpretation, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D007938","descriptor_name":"Leukemia","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":false},{"descriptor_ui":"D007938","descriptor_name":"Leukemia","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":false},{"descriptor_ui":"D007938","descriptor_name":"Leukemia","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":false},{"descriptor_ui":"D007938","descriptor_name":"Leukemia","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D007938","descriptor_name":"Leukemia","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D007938","descriptor_name":"Leukemia","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D009369","descriptor_name":"Neoplasms","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":false},{"descriptor_ui":"D009369","descriptor_name":"Neoplasms","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":false},{"descriptor_ui":"D009369","descriptor_name":"Neoplasms","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":false},{"descriptor_ui":"D009369","descriptor_name":"Neoplasms","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D009369","descriptor_name":"Neoplasms","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D009369","descriptor_name":"Neoplasms","qualifier_ui":"Q000235","qualifier_name":"genetics","is_major_topic":false},{"descriptor_ui":"D015233","descriptor_name":"Models, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015233","descriptor_name":"Models, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015233","descriptor_name":"Models, Statistical","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016002","descriptor_name":"Discriminant Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016002","descriptor_name":"Discriminant Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016002","descriptor_name":"Discriminant Analysis","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D019295","descriptor_name":"Computational Biology","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D020411","descriptor_name":"Oligonucleotide Array Sequence Analysis","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":false},{"descriptor_ui":"D020411","descriptor_name":"Oligonucleotide Array Sequence Analysis","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":false},{"descriptor_ui":"D020411","descriptor_name":"Oligonucleotide Array Sequence Analysis","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":false},{"descriptor_ui":"D020869","descriptor_name":"Gene Expression Profiling","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":false},{"descriptor_ui":"D020869","descriptor_name":"Gene Expression Profiling","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":false},{"descriptor_ui":"D020869","descriptor_name":"Gene Expression Profiling","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":false},{"descriptor_ui":"D030541","descriptor_name":"Databases, Genetic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030541","descriptor_name":"Databases, Genetic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D030541","descriptor_name":"Databases, Genetic","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false}],"locations_count":2,"locations":[{"id":"doi:10.1142/s0219720004000351","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0219720004000351","pdf_url":null,"source":{"id":"https://openalex.org/S155349577","display_name":"Journal of Bioinformatics and Computational Biology","issn_l":"0219-7200","issn":["0219-7200","1757-6334"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311754","host_organization_name":"Imperial College Press","host_organization_lineage":["https://openalex.org/P4310311754"],"host_organization_lineage_names":["Imperial College Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Bioinformatics and Computational Biology","raw_type":"journal-article"},{"id":"pmid:15290759","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/15290759","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of bioinformatics and computational biology","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6600000262260437,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306095","display_name":"American Cancer Society","ror":"https://ror.org/02e463172"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1800261609","https://openalex.org/W1962281270","https://openalex.org/W1966701961","https://openalex.org/W2032265042","https://openalex.org/W2038437697","https://openalex.org/W2046723594","https://openalex.org/W2057331441","https://openalex.org/W2087684630","https://openalex.org/W2108728387","https://openalex.org/W2109363337","https://openalex.org/W2127544153","https://openalex.org/W2133199783","https://openalex.org/W2138550913","https://openalex.org/W2168561598","https://openalex.org/W2795528501","https://openalex.org/W3175417087"],"related_works":["https://openalex.org/W1986892844","https://openalex.org/W170658859","https://openalex.org/W2490137852","https://openalex.org/W3023720143","https://openalex.org/W4310366902","https://openalex.org/W3141198878","https://openalex.org/W2056932428","https://openalex.org/W1483316219","https://openalex.org/W2412120523","https://openalex.org/W2107563406"],"abstract_inverted_index":{"Microarrays":[0],"can":[1,211],"provide":[2],"genome-wide":[3],"expression":[4,23,81],"patterns":[5],"for":[6,10,49,62,75,115,161,181],"various":[7],"cancers,":[8],"especially":[9,180],"tumor":[11,32],"sub-types":[12,33,78],"that":[13,150,165],"may":[14],"exhibit":[15],"substantially":[16],"different":[17],"patient":[18],"prognosis.":[19],"Using":[20],"such":[21],"gene":[22,80,173,193],"data,":[24],"several":[25],"approaches":[26],"have":[27,148],"been":[28],"proposed":[29,121],"to":[30,58,68,110,123,184],"classify":[31],"accurately.":[34],"These":[35],"classification":[36],"methods":[37,57,122,186],"are":[38,102],"not":[39],"robust,":[40],"and":[41,138,164,199,206],"often":[42],"dependent":[43],"on":[44,188],"a":[45,63,88,112],"particular":[46],"training":[47],"sample":[48,107],"modelling,":[50],"which":[51],"raises":[52],"issues":[53],"in":[54,87,204],"utilizing":[55],"these":[56,162],"administer":[59],"proper":[60],"treatment":[61],"future":[64,116],"patient.":[65],"We":[66,118,147],"propose":[67],"construct":[69],"an":[70,105],"optimal,":[71],"robust":[72,113],"prediction":[73,156,167],"model":[74,84,114],"classifying":[76],"cancer":[77,141],"using":[79],"data.":[82,117],"Our":[83],"is":[85,158,197],"constructed":[86],"step-wise":[89],"fashion":[90],"implementing":[91],"cross-validated":[92],"quadratic":[93],"discriminant":[94],"analysis.":[95],"At":[96],"each":[97],"step,":[98],"all":[99],"identified":[100],"models":[101,157,168],"validated":[103],"by":[104,134,143,201],"independent":[106,182],"of":[108,128,153],"patients":[109],"develop":[111],"apply":[119],"the":[120,130,139,151,202],"two":[124,172],"microarray":[125],"data":[126,133,142],"sets":[127],"cancer:":[129],"acute":[131],"leukemia":[132],"Golub":[135],"et":[136,145],"al.":[137,146],"colon":[140],"Alon":[144],"found":[149],"dimensionality":[152],"our":[154,166],"optimal":[155],"relatively":[159],"small":[160],"cases":[163],"with":[169],"one":[170],"or":[171,176,190],"factors":[174],"outperforms":[175],"has":[177],"competing":[178],"performance,":[179],"samples,":[183],"other":[185],"based":[187],"50":[189],"more":[191],"predictive":[192],"factors.":[194],"The":[195,208],"methodology":[196],"implemented":[198],"developed":[200],"procedures":[203],"R":[205],"Splus.":[207],"source":[209],"code":[210],"be":[212],"obtained":[213],"at":[214],"http://hesweb1.med.virginia.edu/bioinformatics.":[215]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
