{"id":"https://openalex.org/W3168288096","doi":"https://doi.org/10.1145/3448016.3457546","title":"Efficient Deep Learning Pipelines for Accurate Cost Estimations Over Large Scale Query Workload","display_name":"Efficient Deep Learning Pipelines for Accurate Cost Estimations Over Large Scale Query Workload","publication_year":2021,"publication_date":"2021-06-09","ids":{"openalex":"https://openalex.org/W3168288096","doi":"https://doi.org/10.1145/3448016.3457546","mag":"3168288096"},"language":"en","primary_location":{"id":"doi:10.1145/3448016.3457546","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3448016.3457546","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3448016.3457546","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Management of Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3448016.3457546","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039002882","display_name":"Johan Kok Zhi Kang","orcid":"https://orcid.org/0000-0003-1082-1008"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Johan Kok Zhi Kang","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107869768","display_name":"Gaurav","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gaurav","raw_affiliation_strings":["GrabTaxi Holdings, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"GrabTaxi Holdings, Singapore, Singapore","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021288816","display_name":"Sien Yi Tan","orcid":"https://orcid.org/0009-0008-9732-6758"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sien Yi Tan","raw_affiliation_strings":["GrabTaxi Holdings, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"GrabTaxi Holdings, Singapore, Singapore","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107387385","display_name":"Feng Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng Cheng","raw_affiliation_strings":["GrabTaxi Holdings, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"GrabTaxi Holdings, Singapore, Singapore","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043164368","display_name":"Shixuan Sun","orcid":"https://orcid.org/0000-0003-4060-9438"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Shixuan Sun","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039946576","display_name":"Bingsheng He","orcid":"https://orcid.org/0000-0001-8618-4581"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Bingsheng He","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5039002882"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":3.2091,"has_fulltext":true,"cited_by_count":26,"citation_normalized_percentile":{"value":0.91971769,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1014","last_page":"1022"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.98580002784729,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8484764099121094},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6210490465164185},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.5675384998321533},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.4919898211956024},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4850849211215973},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4486062526702881},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4306086301803589},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.42818111181259155},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.42340680956840515},{"id":"https://openalex.org/keywords/query-optimization","display_name":"Query optimization","score":0.41337448358535767},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3526771366596222},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35068434476852417},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3424091935157776},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.11848664283752441}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8484764099121094},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6210490465164185},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.5675384998321533},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.4919898211956024},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4850849211215973},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4486062526702881},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4306086301803589},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.42818111181259155},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.42340680956840515},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.41337448358535767},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3526771366596222},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35068434476852417},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3424091935157776},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.11848664283752441},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3448016.3457546","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3448016.3457546","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3448016.3457546","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Management of Data","raw_type":"proceedings-article"},{"id":"pmh:oai:scholarbank.nus.edu.sg:10635/215369","is_oa":false,"landing_page_url":"https://scholarbank.nus.edu.sg/handle/10635/215369","pdf_url":null,"source":{"id":"https://openalex.org/S7407052290","display_name":"National University of Singapore","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Elements","raw_type":"Article"}],"best_oa_location":{"id":"doi:10.1145/3448016.3457546","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3448016.3457546","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3448016.3457546","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2021 International Conference on Management of Data","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.46000000834465027}],"awards":[],"funders":[{"id":"https://openalex.org/F4320320698","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3168288096.pdf","grobid_xml":"https://content.openalex.org/works/W3168288096.grobid-xml"},"referenced_works_count":39,"referenced_works":["https://openalex.org/W117630655","https://openalex.org/W1678889691","https://openalex.org/W2014294093","https://openalex.org/W2038412523","https://openalex.org/W2079967994","https://openalex.org/W2081728040","https://openalex.org/W2100773341","https://openalex.org/W2109907545","https://openalex.org/W2150304332","https://openalex.org/W2405294958","https://openalex.org/W2521550930","https://openalex.org/W2523060838","https://openalex.org/W2612779005","https://openalex.org/W2747329762","https://openalex.org/W2767541512","https://openalex.org/W2794514881","https://openalex.org/W2890276152","https://openalex.org/W2893787142","https://openalex.org/W2906910993","https://openalex.org/W2911581663","https://openalex.org/W2921355642","https://openalex.org/W2946026089","https://openalex.org/W2949762319","https://openalex.org/W2950577311","https://openalex.org/W2952433032","https://openalex.org/W2953384591","https://openalex.org/W2961839838","https://openalex.org/W2963371736","https://openalex.org/W2963853546","https://openalex.org/W2970148517","https://openalex.org/W3031390801","https://openalex.org/W3085960807","https://openalex.org/W3099273181","https://openalex.org/W3100077023","https://openalex.org/W4232868312","https://openalex.org/W4235670907","https://openalex.org/W4244347206","https://openalex.org/W4301409532","https://openalex.org/W6744580074"],"related_works":["https://openalex.org/W986318368","https://openalex.org/W2000785801","https://openalex.org/W2384410913","https://openalex.org/W2352878646","https://openalex.org/W4390608645","https://openalex.org/W2004734601","https://openalex.org/W2130149817","https://openalex.org/W2990194547","https://openalex.org/W1480123525","https://openalex.org/W2620865396"],"abstract_inverted_index":{"The":[0],"use":[1],"of":[2,12,21,41,47,98,120,124,167],"deep":[3],"learning":[4],"models":[5,25],"for":[6,71,145,171],"forecasting":[7],"the":[8,69],"resource":[9,95,143],"consumption":[10,96],"patterns":[11,97],"SQL":[13],"queries":[14,48],"have":[15,26],"recently":[16],"been":[17],"a":[18,85,103,117],"popular":[19],"area":[20],"study.":[22],"While":[23],"these":[24],"demonstrated":[27],"promising":[28],"accuracy,":[29,138],"training":[30,65,158,175],"them":[31],"over":[32,44,111,176],"large":[33,45,172],"scale":[34],"industry":[35],"workloads":[36],"are":[37],"expensive.":[38],"Space":[39],"inefficiencies":[40],"encoding":[42],"techniques":[43],"numbers":[46],"and":[49,67,156],"excessive":[50],"padding":[51],"used":[52],"to":[53,76,140,169],"enforce":[54],"shape":[55],"consistency":[56],"across":[57],"diverse":[58],"query":[59,99],"plans":[60],"implies":[61],"1)":[62],"longer":[63],"model":[64,174],"time":[66,159],"2)":[68],"need":[70],"expensive,":[72],"scaled":[73],"up":[74,168],"infrastructure":[75],"support":[77],"batched":[78,173],"training.":[79],"In":[80],"turn,":[81],"we":[82],"developed":[83],"Prestroid,":[84],"tree":[86],"convolution":[87],"based":[88],"data":[89,118,125],"science":[90],"pipeline":[91,110,133],"that":[92,131],"accurately":[93],"predicts":[94],"traces,":[100],"but":[101],"at":[102],"much":[104],"lower":[105],"cost.":[106],"We":[107,162],"evaluated":[108],"our":[109,132],"19K":[112],"Presto":[113],"OLAP":[114],"queries,":[115],"on":[116,136],"lake":[119],"more":[121,141],"than":[122],"20PB":[123],"from":[126],"Grab.":[127],"Experimental":[128],"results":[129],"imply":[130],"outperforms":[134],"benchmarks":[135],"predictive":[137],"contributing":[139],"precise":[142],"prediction":[144],"large-scale":[146],"workloads,":[147],"yet":[148],"also":[149],"reduces":[150],"per-batch":[151],"memory":[152],"footprint":[153],"by":[154,160],"13.5x":[155],"per-epoch":[157],"3.45x.":[161],"demonstrate":[163],"direct":[164],"cost":[165],"savings":[166],"13.2x":[170],"Microsoft":[177],"Azure":[178],"VMs.":[179]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2021-06-22T00:00:00"}
