{"id":"https://openalex.org/W7161700106","doi":"https://doi.org/10.48550/arxiv.2605.17831","title":"Agentic Cost-Aware Query Planning with Knowledge Distillation for Big Data Analytics","display_name":"Agentic Cost-Aware Query Planning with Knowledge Distillation for Big Data Analytics","publication_year":2026,"publication_date":"2026-05-18","ids":{"openalex":"https://openalex.org/W7161700106","doi":"https://doi.org/10.48550/arxiv.2605.17831"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.17831","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17831","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.17831","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103426619","display_name":"Mahdi Naser-Moghadasi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Naser-Moghadasi, Mahdi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":["https://openalex.org/A5103426619"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.39410001039505005,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.39410001039505005,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.1867000013589859,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.04479999840259552,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/planner","display_name":"Planner","score":0.8356999754905701},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.6305999755859375},{"id":"https://openalex.org/keywords/analytics","display_name":"Analytics","score":0.5727999806404114},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5281000137329102},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.47769999504089355},{"id":"https://openalex.org/keywords/query-optimization","display_name":"Query optimization","score":0.4629000127315521},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.40070000290870667},{"id":"https://openalex.org/keywords/sql","display_name":"SQL","score":0.37950000166893005},{"id":"https://openalex.org/keywords/query-plan","display_name":"Query plan","score":0.3700999915599823}],"concepts":[{"id":"https://openalex.org/C2776999362","wikidata":"https://www.wikidata.org/wiki/Q2349274","display_name":"Planner","level":2,"score":0.8356999754905701},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7990999817848206},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.6305999755859375},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.5727999806404114},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5281000137329102},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.47769999504089355},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.4629000127315521},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42890000343322754},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.40070000290870667},{"id":"https://openalex.org/C510870499","wikidata":"https://www.wikidata.org/wiki/Q47607","display_name":"SQL","level":2,"score":0.37950000166893005},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3725000023841858},{"id":"https://openalex.org/C2779729312","wikidata":"https://www.wikidata.org/wiki/Q784232","display_name":"Query plan","level":5,"score":0.3700999915599823},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.35420000553131104},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3531000018119812},{"id":"https://openalex.org/C162262903","wikidata":"https://www.wikidata.org/wiki/Q343527","display_name":"Allocator","level":2,"score":0.3450999855995178},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.3409000039100647},{"id":"https://openalex.org/C175801342","wikidata":"https://www.wikidata.org/wiki/Q1988917","display_name":"Data analysis","level":2,"score":0.33559998869895935},{"id":"https://openalex.org/C129916263","wikidata":"https://www.wikidata.org/wiki/Q1141183","display_name":"Backward chaining","level":4,"score":0.32429999113082886},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.31769999861717224},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.305400013923645},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C8505890","wikidata":"https://www.wikidata.org/wiki/Q605095","display_name":"Budget constraint","level":2,"score":0.2928999960422516},{"id":"https://openalex.org/C114073186","wikidata":"https://www.wikidata.org/wiki/Q2631895","display_name":"Automated planning and scheduling","level":2,"score":0.29190000891685486},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.2587999999523163},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.2572999894618988},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2563999891281128}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.17831","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17831","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.17831","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.17831","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Query":[0],"optimization":[1,58],"in":[2,130],"big":[3],"data":[4],"analytics":[5,145],"remains":[6],"computationally":[7],"expensive,":[8],"particularly":[9],"for":[10,101],"resource-constrained":[11],"environments":[12],"where":[13],"traditional":[14],"optimizers":[15],"fail":[16],"to":[17,44,97,116],"satisfy":[18],"memory":[19],"and":[20,41,108,149],"latency":[21,80,113],"constraints.":[22,72],"We":[23],"present":[24],"an":[25],"agentic":[26],"query":[27,79],"planning":[28],"system":[29],"that":[30],"combines":[31],"a":[32,45],"rule-based":[33],"teacher":[34,50],"planner,":[35],"UCB1":[36,61],"bandit":[37,62],"exploration,":[38],"cost-aware":[39,85],"prediction,":[40],"knowledge":[42],"distillation":[43],"lightweight":[46],"student":[47,89,125],"planner.":[48],"Our":[49,139],"planner":[51,90,126],"generates":[52],"SQL":[53],"plans":[54,133],"using":[55],"six":[56],"key":[57],"strategies,":[59],"while":[60,119],"search":[63],"efficiently":[64],"explores":[65],"the":[66],"plan":[67,82],"space":[68],"under":[69],"explicit":[70],"resource":[71],"A":[73,87],"Random":[74],"Forest":[75],"cost":[76],"model":[77],"predicts":[78],"from":[81],"features,":[83],"enabling":[84],"decisions.":[86],"distilled":[88],"(Logistic":[91],"Regression":[92],"or":[93],"Gradient":[94],"Boosting)":[95],"learns":[96],"mimic":[98],"teacher-bandit":[99],"decisions":[100],"fast":[102],"inference.":[103],"Evaluation":[104],"on":[105,146],"NYC":[106],"Taxi":[107],"IMDB":[109],"datasets":[110],"demonstrates":[111],"23%":[112],"reduction":[114],"compared":[115],"default":[117],"planners":[118],"maintaining":[120],"94%":[121],"constraint":[122],"satisfaction.":[123],"The":[124],"achieves":[127],"89%":[128],"accuracy":[129],"replicating":[131],"optimal":[132],"with":[134],"15x":[135],"faster":[136],"inference":[137],"time.":[138],"single-file":[140],"implementation":[141],"enables":[142],"reproducible":[143],"big-data":[144],"resource-limited":[147],"machines":[148],"is":[150],"publicly":[151],"available":[152],"at":[153],"https://github.com/mahdinaser/agentic-kd-planner.":[154]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-20T00:00:00"}
