{"id":"https://openalex.org/W2964832265","doi":"https://doi.org/10.1145/3292500.3340407","title":"Addressing Challenges in Data Science","display_name":"Addressing Challenges in Data Science","publication_year":2019,"publication_date":"2019-07-25","ids":{"openalex":"https://openalex.org/W2964832265","doi":"https://doi.org/10.1145/3292500.3340407","mag":"2964832265"},"language":"en","primary_location":{"id":"doi:10.1145/3292500.3340407","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3292500.3340407","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005512393","display_name":"Joseph K. Bradley","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Joseph Bradley","raw_affiliation_strings":["Databricks, Inc., San Francisco, CA, USA"],"affiliations":[{"raw_affiliation_string":"Databricks, Inc., San Francisco, CA, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5005512393"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.10496918,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"3163","last_page":"3163"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9726999998092651,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11891","display_name":"Big Data and Business Intelligence","score":0.9710999727249146,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.8066679239273071},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.7986618280410767},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6559137105941772},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.6440589427947998},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5911632776260376},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5366133451461792},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.4931632876396179},{"id":"https://openalex.org/keywords/open-data","display_name":"Open data","score":0.4688972532749176},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.46847003698349},{"id":"https://openalex.org/keywords/open-source","display_name":"Open source","score":0.41246843338012695},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3087059259414673},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2356899380683899},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.10441654920578003},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.09820953011512756}],"concepts":[{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.8066679239273071},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.7986618280410767},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6559137105941772},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.6440589427947998},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5911632776260376},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5366133451461792},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.4931632876396179},{"id":"https://openalex.org/C2780535194","wikidata":"https://www.wikidata.org/wiki/Q309901","display_name":"Open data","level":2,"score":0.4688972532749176},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.46847003698349},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.41246843338012695},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3087059259414673},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2356899380683899},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.10441654920578003},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.09820953011512756},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3292500.3340407","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3292500.3340407","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 25th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.550000011920929}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4390608645","https://openalex.org/W4394895745","https://openalex.org/W2960264696","https://openalex.org/W2766461310","https://openalex.org/W4247566972","https://openalex.org/W3083262785","https://openalex.org/W4388692845","https://openalex.org/W3202731209","https://openalex.org/W3211874991","https://openalex.org/W4386133355"],"abstract_inverted_index":{"Data":[0],"science":[1],"in":[2],"modern":[3],"applications":[4,36],"is":[5],"pushing":[6],"the":[7,17,24],"limits":[8],"of":[9,15,19,26],"tools":[10],"and":[11,23,37,48,62],"organizations.":[12],"The":[13],"scale":[14],"data,":[16],"breadth":[18],"required":[20],"skill":[21],"sets,":[22],"complexity":[25],"workflows":[27],"all":[28],"cause":[29],"organizations":[30],"to":[31,40,51],"stumble":[32],"when":[33],"developing":[34],"data-powered":[35],"moving":[38],"them":[39,53],"production.":[41],"This":[42],"talk":[43],"will":[44],"discuss":[45],"these":[46],"challenges":[47],"Databricks'":[49],"efforts":[50],"overcome":[52],"within":[54],"open":[55],"source":[56],"software":[57],"projects":[58],"like":[59],"Apache":[60],"Spark":[61],"MLflow.":[63]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
