{"id":"https://openalex.org/W7124180232","doi":"https://doi.org/10.1145/3777577.3777608","title":"Combining biostatistical methods to improve the accuracy of gene expression data analysis","display_name":"Combining biostatistical methods to improve the accuracy of gene expression data analysis","publication_year":2025,"publication_date":"2025-10-24","ids":{"openalex":"https://openalex.org/W7124180232","doi":"https://doi.org/10.1145/3777577.3777608"},"language":null,"primary_location":{"id":"doi:10.1145/3777577.3777608","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3777577.3777608","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 6th International Symposium on Artificial Intelligence for Medical Sciences","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3777577.3777608","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123053540","display_name":"Zhouzi Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhouzi Xu","raw_affiliation_strings":["East China Normal University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"East China Normal University, Shanghai, China","institution_ids":["https://openalex.org/I66867065"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5123053540"],"corresponding_institution_ids":["https://openalex.org/I66867065"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.66712166,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"189","last_page":"194"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9563000202178955,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9563000202178955,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10062","display_name":"MicroRNA in disease regulation","score":0.006000000052154064,"subfield":{"id":"https://openalex.org/subfields/1306","display_name":"Cancer Research"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11289","display_name":"Single-cell and spatial transcriptomics","score":0.0024999999441206455,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dimensionality-reduction","display_name":"Dimensionality reduction","score":0.7333999872207642},{"id":"https://openalex.org/keywords/principal-component-analysis","display_name":"Principal component analysis","score":0.6470000147819519},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.5960000157356262},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.5185999870300293},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5095000267028809},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.5041000247001648},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.40709999203681946},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.37279999256134033},{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.3059000074863434}],"concepts":[{"id":"https://openalex.org/C70518039","wikidata":"https://www.wikidata.org/wiki/Q16000077","display_name":"Dimensionality reduction","level":2,"score":0.7333999872207642},{"id":"https://openalex.org/C27438332","wikidata":"https://www.wikidata.org/wiki/Q2873","display_name":"Principal component analysis","level":2,"score":0.6470000147819519},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.5960000157356262},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5885999798774719},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5637999773025513},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.5185999870300293},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5095000267028809},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.5041000247001648},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.40709999203681946},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.37279999256134033},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3723999857902527},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3700000047683716},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.3059000074863434},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.29980000853538513},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.29679998755455017},{"id":"https://openalex.org/C69738355","wikidata":"https://www.wikidata.org/wiki/Q1228929","display_name":"Linear discriminant analysis","level":2,"score":0.28679999709129333},{"id":"https://openalex.org/C122280245","wikidata":"https://www.wikidata.org/wiki/Q620622","display_name":"Kernel method","level":3,"score":0.2840000092983246},{"id":"https://openalex.org/C2780091087","wikidata":"https://www.wikidata.org/wiki/Q3092131","display_name":"Dimensional reduction","level":2,"score":0.27379998564720154},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.27309998869895935},{"id":"https://openalex.org/C12426560","wikidata":"https://www.wikidata.org/wiki/Q189569","display_name":"Basis (linear algebra)","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.26899999380111694},{"id":"https://openalex.org/C182335926","wikidata":"https://www.wikidata.org/wiki/Q17093020","display_name":"Kernel principal component analysis","level":4,"score":0.2644999921321869},{"id":"https://openalex.org/C10485038","wikidata":"https://www.wikidata.org/wiki/Q48996162","display_name":"Hyperparameter optimization","level":3,"score":0.25529998540878296},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.25290000438690186},{"id":"https://openalex.org/C21200559","wikidata":"https://www.wikidata.org/wiki/Q7451068","display_name":"Sensitivity (control systems)","level":2,"score":0.25110000371932983},{"id":"https://openalex.org/C184509293","wikidata":"https://www.wikidata.org/wiki/Q5136711","display_name":"Clustering high-dimensional data","level":3,"score":0.25110000371932983},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3777577.3777608","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3777577.3777608","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 6th International Symposium on Artificial Intelligence for Medical Sciences","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3777577.3777608","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3777577.3777608","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 6th International Symposium on Artificial Intelligence for Medical Sciences","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W3179942648","https://openalex.org/W4212947836","https://openalex.org/W4377093804","https://openalex.org/W4385344498","https://openalex.org/W4390187248","https://openalex.org/W4391512455","https://openalex.org/W4396889307","https://openalex.org/W4403088623"],"related_works":[],"abstract_inverted_index":{"Gene":[0],"expression":[1,171,280],"data":[2,123,231],"contains":[3],"rich":[4],"disease":[5,289],"information,":[6,127],"but":[7],"its":[8],"high":[9,160,272],"dimensionality,":[10],"redundancy,":[11],"and":[12,25,36,51,64,87,109,124,140,152,184,189,210,232,245,267,274],"nonlinear":[13,102],"relationships":[14],"pose":[15],"performance":[16,253],"bottlenecks":[17],"for":[18,287],"traditional":[19],"analysis":[20,55,235],"methods":[21,148],"in":[22,33,165,276],"cancer":[23,223,278],"prediction":[24],"classification.":[26],"Therefore,":[27],"this":[28,199],"study":[29,105],"introduces":[30],"biostatistical":[31],"improvements":[32],"dimensionality":[34,62,143],"reduction":[35,63,144],"modeling.":[37],"A":[38],"weighted":[39,110,130],"matrix":[40,111,131],"is":[41,57,80,92],"used":[42],"to":[43,59,66,96,100],"enhance":[44,97,141],"the":[45,68,98,118,122,129,134,142,156,177,219,238],"contribution":[46,135],"of":[47,70,121,136,158,196,217,243,248,255,264],"highly":[48,137],"variable":[49,138],"genes,":[50],"hierarchical":[52,107,265],"principal":[53,119],"component":[54],"(PCA)":[56],"combined":[58,262],"achieve":[60],"multi-level":[61],"denoising":[65],"mitigate":[67],"\"curse":[69],"dimensionality.\"":[71],"For":[72],"classification":[73],"prediction,":[74],"a":[75,83,241,252,283],"support":[76,268],"vector":[77,269],"machine-based":[78],"model":[79,179,200,220,239],"constructed,":[81],"incorporating":[82],"radial":[84],"basis":[85],"kernel":[86],"regularization":[88],"term.":[89],"Hyperparameter":[90],"configuration":[91],"optimized":[93],"through":[94],"cross-validation":[95],"ability":[99],"characterize":[101],"patterns.":[103],"This":[104],"selects":[106],"PCA":[108,114,266],"methods.":[112],"Since":[113],"can":[115,132],"effectively":[116],"extract":[117],"components":[120],"reduce":[125],"redundant":[126],"while":[128,250],"highlight":[133],"genes":[139],"effect.":[145],"In":[146],"contrast,":[147],"such":[149,204],"as":[150,205],"autoencoders":[151],"t-SNE":[153],"may":[154],"face":[155],"disadvantages":[157],"excessively":[159],"computational":[161],"complexity":[162],"or":[163],"difficulty":[164],"interpretation":[166],"when":[167],"dealing":[168],"with":[169,192],"gene":[170,279],"data.":[172],"Experimental":[173],"results":[174],"demonstrate":[175],"that":[176,237],"SVM-PCA":[178],"achieves":[180,271],"outstanding":[181],"accuracy,":[182],"recall,":[183],"precision,":[185],"reaching":[186],"92.4%,":[187],"93.1%,":[188],"91.2%,":[190],"respectively,":[191],"an":[193,215,246],"F1":[194],"score":[195],"92.1%.":[197],"Overall,":[198],"outperforms":[201],"control":[202],"models":[203],"random":[206],"forests,":[207],"decision":[208],"trees,":[209],"K-nearest":[211],"neighbors.":[212],"Furthermore,":[213],"achieving":[214],"AUC":[216],"94.5%,":[218],"consistently":[221],"distinguishes":[222],"from":[224],"normal":[225],"samples":[226],"at":[227],"various":[228],"thresholds.":[229],"Imbalanced":[230],"hyperparameter":[233],"sensitivity":[234],"showed":[236],"achieved":[240],"specificity":[242],"94.8%":[244],"MCC":[247],"0.88,":[249],"maintaining":[251],"stability":[254],"97.6%":[256],"under":[257],"\u00b115%":[258],"parameter":[259],"perturbations.":[260],"The":[261],"strategy":[263],"machines":[270],"accuracy":[273],"robustness":[275],"analyzing":[277],"data,":[281],"providing":[282],"reliable":[284],"technical":[285],"path":[286],"complex":[288],"prediction.":[290]},"counts_by_year":[],"updated_date":"2026-01-15T23:21:31.212559","created_date":"2026-01-15T00:00:00"}
