{"id":"https://openalex.org/W4401863432","doi":"https://doi.org/10.1145/3637528.3672194","title":"Empower an End-to-end Scalable and Interpretable Data Science Ecosystem using Statistics, AI and Domain Science","display_name":"Empower an End-to-end Scalable and Interpretable Data Science Ecosystem using Statistics, AI and Domain Science","publication_year":2024,"publication_date":"2024-08-24","ids":{"openalex":"https://openalex.org/W4401863432","doi":"https://doi.org/10.1145/3637528.3672194"},"language":"en","primary_location":{"id":"doi:10.1145/3637528.3672194","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3637528.3672194","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030660111","display_name":"Xihong Lin","orcid":"https://orcid.org/0000-0001-7067-7752"},"institutions":[{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xihong Lin","raw_affiliation_strings":["Harvard University, Boston, MA, USA"],"affiliations":[{"raw_affiliation_string":"Harvard University, Boston, MA, USA","institution_ids":["https://openalex.org/I136199984"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5030660111"],"corresponding_institution_ids":["https://openalex.org/I136199984"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14612813,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10261","display_name":"Genetic Associations and Epidemiology","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10261","display_name":"Genetic Associations and Epidemiology","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1311","display_name":"Genetics"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9851999878883362,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9779000282287598,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6345210075378418},{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.6289277076721191},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.590951681137085},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5212567448616028},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.47552087903022766},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.3212815523147583},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2556030750274658},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.16396471858024597},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1314300298690796}],"concepts":[{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6345210075378418},{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.6289277076721191},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.590951681137085},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5212567448616028},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.47552087903022766},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.3212815523147583},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2556030750274658},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.16396471858024597},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1314300298690796},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3637528.3672194","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3637528.3672194","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2163953557","https://openalex.org/W2951393041","https://openalex.org/W3080999617","https://openalex.org/W4307838508","https://openalex.org/W4312181330","https://openalex.org/W4389207605","https://openalex.org/W4399641977"],"related_works":["https://openalex.org/W3179968364","https://openalex.org/W1999612375","https://openalex.org/W2389214306","https://openalex.org/W2938107654","https://openalex.org/W4235240664","https://openalex.org/W2151749779","https://openalex.org/W2965083567","https://openalex.org/W3008587939","https://openalex.org/W1838576100","https://openalex.org/W2095886385"],"abstract_inverted_index":{"The":[0],"data":[1,5,15,28,36,46,82,147],"science":[2,42,83,103,148],"ecosystem":[3,84],"encompasses":[4],"fairness,":[6],"statistical,":[7],"ML":[8],"and":[9,12,17,19,30,60,72,80,93,99,101,115,132,139,142,146],"AI":[10,25],"methods":[11],"tools,":[13],"interpretable":[14,81,140],"analysis":[16,87,125],"results,":[18],"trustworthy":[20,62],"decision-making.":[21,63],"Rapid":[22],"advancements":[23],"in":[24,55,75],"have":[26],"revolutionized":[27],"utilization":[29],"enabled":[31],"machines":[32],"to":[33],"learn":[34],"from":[35,45],"more":[37],"effectively.":[38],"Statistics,":[39],"as":[40,104],"the":[41,70,86,124],"of":[43,88,126],"learning":[44],"while":[47],"accounting":[48],"for":[49],"uncertainty,":[50],"plays":[51],"a":[52,136],"pivotal":[53],"role":[54],"addressing":[56],"complex":[57],"real-world":[58],"problems":[59],"facilitating":[61],"In":[64],"this":[65],"talk,":[66],"I":[67,118],"will":[68,119],"discuss":[69],"challenges":[71],"opportunities":[73],"involved":[74],"building":[76],"an":[77,105],"end-to-end":[78],"scalable":[79,138],"using":[85,123],"whole":[89,109,128],"genome":[90,110,129],"sequencing":[91,130],"studies":[92,131],"biobanks":[94,133],"that":[95],"integrates":[96],"statistics,":[97],"ML/AI,":[98],"genomic":[100],"health":[102,113],"example.":[106],"Biobanks":[107],"collect":[108],"data,":[111],"electronic":[112],"records":[114],"epidemiological":[116],"data.":[117],"illustrate":[120],"key":[121],"points":[122],"multi-ancestry":[127],"by":[134],"discussing":[135],"few":[137],"statistical":[141],"ML/AI":[143],"methods,":[144],"tools":[145],"resources.":[149]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
