{"id":"https://openalex.org/W2577522781","doi":"https://doi.org/10.1090/dimacs/070/03","title":"Biostatistical challenges in molecular data analysis","display_name":"Biostatistical challenges in molecular data analysis","publication_year":2007,"publication_date":"2007-06-07","ids":{"openalex":"https://openalex.org/W2577522781","doi":"https://doi.org/10.1090/dimacs/070/03","mag":"2577522781"},"language":"en","primary_location":{"id":"doi:10.1090/dimacs/070/03","is_oa":false,"landing_page_url":"https://doi.org/10.1090/dimacs/070/03","pdf_url":null,"source":{"id":"https://openalex.org/S4210177973","display_name":"DIMACS series in discrete mathematics and theoretical computer science","issn_l":"1052-1798","issn":["1052-1798","2472-4793"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"DIMACS Series in Discrete Mathematics and\n                        Theoretical Computer Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109548095","display_name":"William D. Shannon","orcid":null},"institutions":[{"id":"https://openalex.org/I204465549","display_name":"Washington University in St. Louis","ror":"https://ror.org/01yc7t268","country_code":"US","type":"education","lineage":["https://openalex.org/I204465549"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"William Shannon","raw_affiliation_strings":["Washington University in St. Louis,"],"affiliations":[{"raw_affiliation_string":"Washington University in St. Louis,","institution_ids":["https://openalex.org/I204465549"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5109548095"],"corresponding_institution_ids":["https://openalex.org/I204465549"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.2693662,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"63","last_page":"71"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10261","display_name":"Genetic Associations and Epidemiology","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10261","display_name":"Genetic Associations and Epidemiology","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9824000000953674,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4500772953033447},{"id":"https://openalex.org/keywords/computational-biology","display_name":"Computational biology","score":0.3971033990383148},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.1408710479736328}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4500772953033447},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.3971033990383148},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.1408710479736328}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1090/dimacs/070/03","is_oa":false,"landing_page_url":"https://doi.org/10.1090/dimacs/070/03","pdf_url":null,"source":{"id":"https://openalex.org/S4210177973","display_name":"DIMACS series in discrete mathematics and theoretical computer science","issn_l":"1052-1798","issn":["1052-1798","2472-4793"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"book series"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"DIMACS Series in Discrete Mathematics and\n                        Theoretical Computer Science","raw_type":"book-chapter"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/3","score":0.8399999737739563,"display_name":"Good health and well-being"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W1980247209","https://openalex.org/W2020861455","https://openalex.org/W2031842395","https://openalex.org/W2043192290","https://openalex.org/W2156602023"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2350741829","https://openalex.org/W2130043461","https://openalex.org/W2530322880","https://openalex.org/W1596801655"],"abstract_inverted_index":{"Epidemiology":[0,257,258],"is":[1,61,105,133,175,195,211,468,471,480,493,515],"the":[2,5,22,29,33,41,57,96,117,121,134,147,176,196,212,228,263,266,283,290,294,302,326,341,408,420,439,445,454,459,462,465,475,481,486,490,494,499,503,510,520,527],"study":[3],"of":[4,9,24,38,46,67,84,86,98,100,102,112,119,125,137,150,179,186,198,205,214,230,265,270,285,299,307,325,340,392,411,422,435,461,483,496,519,522],"distribution":[6,267],"and":[7,26,36,44,60,64,90,114,238,253,268,287,297,305,315,317,338,362,402,441,458,488,501],"size":[8,269,339],"disease":[10,47,271,313,342,446,487,500,525],"problems":[11,165,223,272],"in":[12,15,21,130,227,273,276,282,331,448,464,474],"human":[13,138,274],"populations,":[14,275],"particular":[16,277,417],"to":[17,27,55,75,146,219,278,288,321,377,413,453],"identify":[18,56,279,322],"etiological":[19,58,280,323,368],"factors":[20,217,281,351,370],"pathogenesis":[23,284],"diseases":[25,286,327],"provide":[28,289],"data":[30,76,291,405],"essential":[31,292],"for":[32,40,293,301,403],"management,":[34,295],"evaluation":[35,296],"planning":[37,298],"services":[39,300],"prevention,":[42,303],"control":[43,304],"treatment":[45,306],"(Everitt,":[48,309],"1998).":[49,310],"Molecular":[50,70],"epidemiology":[51,71,132,170,394],"uses":[52],"molecular":[53,79,87,131,169,206,216],"biology":[54,80],"factors,":[59],"a":[62,202,416,424,427,433,516],"growing":[63],"important":[65,215],"area":[66],"biomedical":[68],"research.":[69],"presents":[72],"new":[73],"challenges":[74],"analysts.":[77],"Modern":[78],"can":[81,224],"measure":[82,95,518],"tens":[83,99],"thousands":[85,101,111],"variables":[88,151],"rapidly":[89],"cheaply":[91],"(e.g.,":[92],"gene":[93],"chips":[94],"activity":[97],"genes,":[103],"genotyping":[104],"routinely":[106],"done":[107],"at":[108],"hundreds":[109],"or":[110],"markers,":[113],"proteomics":[115],"has":[116,259],"potential":[118],"characterizing":[120],"entire":[122],"protein":[123],"content":[124],"tissues).":[126],"The":[127,172,192,208,388],"limiting":[128],"step":[129],"small":[135,155],"number":[136,149,204,482,495],"subjects":[139],"these":[140,222],"measurements":[141],"are":[142,456],"made":[143],"on":[144],"relative":[145],"large":[148,153,203],"(the":[152],"P,":[154],"N":[156],"problem).":[157],"In":[158],"this":[159,319,507],"paper":[160],"I":[161,242],"address":[162],"three":[163],"statistical":[164,184,390,517],"faced":[166],"when":[167],"analyzing":[168],"data.":[171],"first":[173],"problem":[174,194,210],"proper":[177],"identification":[178],"patient":[180],"subgroups":[181],"within":[182],"which":[183,514],"tests":[185],"genotype-phenotype":[187],"association":[188,521],"should":[189],"be":[190,225],"applied.":[191],"second":[193],"testing":[197],"clinical":[199],"covariates":[200],"against":[201],"variables.":[207],"third":[209],"selection":[213],"related":[218],"disease.":[220,425],"While":[221],"defined":[226,261],"language":[229],"classical":[231],"statistics":[232,249],"(i.e.,":[233],"population":[234],"stratification,":[235],"over-determined":[236],"systems,":[237],"variable":[239],"selection,":[240],"respectively),":[241],"believe":[243],"their":[244,378],"solution":[245],"will":[246],"require":[247],"combining":[248],"with":[250,335,353,438,485,489,498,524],"discrete":[251],"mathematics":[252],"computer":[254],"science.":[255],"1.":[256],"been":[260,396],"as":[262],"\u201cstudy":[264],"disease\u201d":[308],"By":[311],"quantifying":[312],"occurrence":[314],"location,":[316],"using":[318],"information":[320],"causes":[324],"by":[328,526],"observing":[329],"differences":[330],"exposures":[332],"among":[333],"areas":[334],"different":[336],"distributions":[337],"problem,":[343],"modern":[344,385,393],"epidemiologists":[345],"have":[346,365,395],"successfully":[347],"identified":[348,383],"serious":[349],"risk":[350,369],"associated":[352],"many":[354],"diseases.":[355],"For":[356],"example,":[357],"heart":[358],"disease,":[359],"skin":[360],"cancer,":[361],"lung":[363],"cancer":[364],"very":[366],"clear":[367],"\u2013":[371,375],"obesity,":[372],"sun,":[373],"smoking":[374],"contributing":[376],"occurrence.":[379],"These":[380],"were":[381],"all":[382,449],"through":[384],"epidemiological":[386],"studies.":[387],"quantitative":[389],"tools":[391],"well":[397],"worked":[398],"out":[399],"both":[400],"theoretically":[401],"applied":[404],"analysis.":[406],"Consider":[407],"simple":[409,528],"case":[410],"trying":[412],"decide":[414],"whether":[415],"exposure":[418,463,523],"increases":[419],"odds":[421,511],"having":[423],"Under":[426],"suitable":[428],"controlled":[429],"(and":[430],"defined)":[431],"experiment":[432],"set":[434],"cases":[436],"(those":[437,443],"disease)":[440],"controls":[442],"without":[444,502],"but":[447],"other":[450],"senses":[451],"similar":[452],"cases)":[455],"obtained":[457],"frequency":[460],"two":[466],"groups":[467],"compared.":[469],"This":[470],"easily":[472],"displayed":[473],"adjacent":[476],"Figure":[477],"where":[478],"NYY":[479],"people":[484,497],"exposure,":[491,504],"NYN":[492],"etc.":[505],"From":[506],"we":[508],"calculate":[509],"ratio":[512],"(OR)":[513],"formula:":[529],"Exposure":[530]},"counts_by_year":[],"updated_date":"2026-03-17T06:59:57.516163","created_date":"2025-10-10T00:00:00"}
