{"id":"https://openalex.org/W4413979874","doi":"https://doi.org/10.14778/3749646.3749685","title":"Semantic Operators and Their Optimization: Enabling LLM-Based Data Processing with Accuracy Guarantees in LOTUS","display_name":"Semantic Operators and Their Optimization: Enabling LLM-Based Data Processing with Accuracy Guarantees in LOTUS","publication_year":2025,"publication_date":"2025-07-01","ids":{"openalex":"https://openalex.org/W4413979874","doi":"https://doi.org/10.14778/3749646.3749685"},"language":"en","primary_location":{"id":"doi:10.14778/3749646.3749685","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3749646.3749685","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111335362","display_name":"Liana Patel","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Liana Patel","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Siddharth Jha","orcid":null},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Siddharth Jha","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Melissa Pan","orcid":null},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Melissa Pan","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101454782","display_name":"Harshit Gupta","orcid":"https://orcid.org/0000-0003-2850-0851"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Harshit Gupta","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092622495","display_name":"Parth Asawa","orcid":null},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Parth Asawa","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090739892","display_name":"Carlos Guestrin","orcid":"https://orcid.org/0000-0001-6348-5939"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Carlos Guestrin","raw_affiliation_strings":["Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005554337","display_name":"Matei Zaharia","orcid":"https://orcid.org/0000-0002-7547-7204"},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matei Zaharia","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5111335362"],"corresponding_institution_ids":["https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":8.8623,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.97527833,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"18","issue":"11","first_page":"4171","last_page":"4184"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13734","display_name":"Advanced Computational Techniques and Applications","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11063","display_name":"Rough Sets and Fuzzy Logic","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/lotus","display_name":"Lotus","score":0.8082113862037659},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6590481996536255},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.32838788628578186}],"concepts":[{"id":"https://openalex.org/C2777635637","wikidata":"https://www.wikidata.org/wiki/Q3645698","display_name":"Lotus","level":2,"score":0.8082113862037659},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6590481996536255},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.32838788628578186},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3749646.3749685","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3749646.3749685","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1792966054","https://openalex.org/W2164598857","https://openalex.org/W2185907055","https://openalex.org/W2750779823","https://openalex.org/W2752236330","https://openalex.org/W2762513422","https://openalex.org/W2786278116","https://openalex.org/W2963961878","https://openalex.org/W3000318171","https://openalex.org/W3015151524","https://openalex.org/W3021397474","https://openalex.org/W3027879771","https://openalex.org/W3082972076","https://openalex.org/W3104939451","https://openalex.org/W3185475042","https://openalex.org/W3198567598","https://openalex.org/W3200980294","https://openalex.org/W4287207977","https://openalex.org/W4288350551","https://openalex.org/W4301674784","https://openalex.org/W4378072589","https://openalex.org/W4383046915","https://openalex.org/W4383605243","https://openalex.org/W4385327559","https://openalex.org/W4385573057","https://openalex.org/W4387929127","https://openalex.org/W4388788698","https://openalex.org/W4389519187","https://openalex.org/W4389523765","https://openalex.org/W4389539730","https://openalex.org/W4390962868","https://openalex.org/W4391159183","https://openalex.org/W4392627821","https://openalex.org/W4399174383","https://openalex.org/W4400702599","https://openalex.org/W4401044046","https://openalex.org/W4402704461","https://openalex.org/W4403160910","https://openalex.org/W4403577657","https://openalex.org/W4403754277"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2366835325","https://openalex.org/W2074679237","https://openalex.org/W4402573539","https://openalex.org/W4238472094","https://openalex.org/W162164374","https://openalex.org/W2922035700","https://openalex.org/W2461260704"],"abstract_inverted_index":{"The":[0],"semantic":[1,45,138,152,180],"capabilities":[2],"of":[3,103,207,228],"large":[4],"language":[5,61,72],"models":[6],"(LLMs)":[7],"have":[8],"the":[9,48,91,94,100,155,179,226,242],"potential":[10],"to":[11,39,89,136,147,198,214,238],"enable":[12],"rich":[13],"analytics":[14],"and":[15,112,142,158,173,194],"reasoning":[16],"over":[17,93],"vast":[18],"knowledge":[19],"corpora.":[20],"Unfortunately,":[21],"existing":[22],"systems":[23,211],"either":[24],"empirically":[25],"optimize":[26],"expensive":[27],"LLM-powered":[28],"operations":[29,58,144],"with":[30,51,59,106],"no":[31],"performance":[32],"guarantees":[33,54,124],",":[34,47,83,111],"or":[35,67,204,224],"limit":[36],"their":[37],"support":[38],"simple":[40],"batched-inference":[41],"primitives.":[42],"We":[43,150,176],"introduce":[44],"operators":[46,153],"first":[49],"formalism":[50],"statistical":[52],"accuracy":[53,123,218,227],"for":[55,125,232],"general-purpose":[56],"AI-based":[57],"natural":[60,71],"parameters":[62],"(e.g.,":[63],"filtering,":[64,139],"sorting,":[65],"joining":[66],"aggregating":[68],"records":[69],"using":[70],"criteria).":[73],"Each":[74],"operator":[75,105,181,192],"can":[76],"be":[77],"implemented":[78],"by":[79,145,212],"multiple":[80],"AI":[81,187,230],"algorithms":[82],"which":[84],"compose":[85],"individual":[86,126],"model":[87,92,98,182],"invocations":[88],"orchestrate":[90],"data.":[95],"Our":[96],"programming":[97],"specifies":[99],"expected":[101],"behavior":[102],"each":[104,233],"a":[107,190],"high-quality":[108],"reference":[109],"algorithm":[110],"we":[113,131],"develop":[114],"an":[115],"optimization":[116],"framework":[117],"that":[118,178,202],"reduces":[119],"cost,":[120],"while":[121,216,235],"providing":[122],"operators.":[127],"Using":[128],"this":[129],"approach,":[130],"propose":[132],"several":[133],"novel":[134],"optimizations":[135],"accelerate":[137],"joining,":[140],"group-by":[141],"top-k":[143],"up":[146,213,237],"1,":[148],"000\u00d7.":[149],"implement":[151],"in":[154,189],"LOTUS":[156,221,245],"system":[157],"demonstrate":[159],"LOTUS'":[160],"effectiveness":[161],"on":[162],"real,":[163],"bulk-semantic":[164],"processing":[165],"applications,":[166],"including":[167],"fact-checking,":[168],"biomedical":[169],"multi-label":[170],"classification,":[171],"search,":[172],"topic":[174],"analysis.":[175],"show":[177],"is":[183,246],"expressive,":[184],"capturing":[185],"state-of-the-art":[186,229],"pipelines":[188,201,231],"few":[191],"calls,":[193],"making":[195],"it":[196],"easy":[197],"express":[199],"new":[200],"match":[203,223],"exceed":[205,225],"quality":[206],"recent":[208],"LLM-based":[209],"analytic":[210],"170%,":[215],"offering":[217],"guarantees.":[219],"Overall,":[220],"programs":[222],"task":[234],"running":[236],"3.6\u00d7":[239],"faster":[240],"than":[241],"highest-quality":[243],"baselines.":[244],"publicly":[247],"available":[248],"at":[249],"https://github.com/lotus-data/lotus.":[250]},"counts_by_year":[{"year":2026,"cited_by_count":4}],"updated_date":"2026-04-22T08:38:42.863108","created_date":"2025-10-10T00:00:00"}
