{"id":"https://openalex.org/W4413978072","doi":"https://doi.org/10.14778/3749646.3749713","title":"QUEST: Query Optimization in Unstructured Document Analysis","display_name":"QUEST: Query Optimization in Unstructured Document Analysis","publication_year":2025,"publication_date":"2025-07-01","ids":{"openalex":"https://openalex.org/W4413978072","doi":"https://doi.org/10.14778/3749646.3749713"},"language":"en","primary_location":{"id":"doi:10.14778/3749646.3749713","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3749646.3749713","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5113342689","display_name":"Zhaoze Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhaoze Sun","raw_affiliation_strings":["Beijing Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101797040","display_name":"Chengliang Chai","orcid":"https://orcid.org/0000-0001-8080-5594"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengliang Chai","raw_affiliation_strings":["Beijing Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5098770356","display_name":"Qiyan Deng","orcid":"https://orcid.org/0009-0002-7797-5888"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiyan Deng","raw_affiliation_strings":["Beijing Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113881881","display_name":"Kaisen Jin","orcid":"https://orcid.org/0009-0004-7020-5404"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kaisen Jin","raw_affiliation_strings":["Beijing Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017660782","display_name":"Xinyu Guo","orcid":"https://orcid.org/0000-0002-7282-7379"},"institutions":[{"id":"https://openalex.org/I138006243","display_name":"University of Arizona","ror":"https://ror.org/03m2x1q45","country_code":"US","type":"education","lineage":["https://openalex.org/I138006243"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xinyu Guo","raw_affiliation_strings":["University of Arizona, United States"],"affiliations":[{"raw_affiliation_string":"University of Arizona, United States","institution_ids":["https://openalex.org/I138006243"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104160462","display_name":"Han Han","orcid":null},"institutions":[{"id":"https://openalex.org/I138006243","display_name":"University of Arizona","ror":"https://ror.org/03m2x1q45","country_code":"US","type":"education","lineage":["https://openalex.org/I138006243"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Han Han","raw_affiliation_strings":["University of Arizona, United States"],"affiliations":[{"raw_affiliation_string":"University of Arizona, United States","institution_ids":["https://openalex.org/I138006243"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030900096","display_name":"Ye Yuan","orcid":"https://orcid.org/0000-0002-6282-6057"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ye Yuan","raw_affiliation_strings":["Beijing Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103052244","display_name":"Guoren Wang","orcid":"https://orcid.org/0000-0002-8411-2127"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoren Wang","raw_affiliation_strings":["Beijing Institute of Technology, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Technology, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049926126","display_name":"Lei Cao","orcid":"https://orcid.org/0000-0001-9909-8607"},"institutions":[{"id":"https://openalex.org/I138006243","display_name":"University of Arizona","ror":"https://ror.org/03m2x1q45","country_code":"US","type":"education","lineage":["https://openalex.org/I138006243"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lei Cao","raw_affiliation_strings":["University of Arizona, United States"],"affiliations":[{"raw_affiliation_string":"University of Arizona, United States","institution_ids":["https://openalex.org/I138006243"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5113342689"],"corresponding_institution_ids":["https://openalex.org/I125839683"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13610183,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"18","issue":"11","first_page":"4560","last_page":"4573"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6821694374084473},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5914605855941772},{"id":"https://openalex.org/keywords/query-optimization","display_name":"Query optimization","score":0.5408300757408142},{"id":"https://openalex.org/keywords/unstructured-data","display_name":"Unstructured data","score":0.42415598034858704},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.27861297130584717},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.09593448042869568}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6821694374084473},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5914605855941772},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.5408300757408142},{"id":"https://openalex.org/C2781252014","wikidata":"https://www.wikidata.org/wiki/Q1141900","display_name":"Unstructured data","level":3,"score":0.42415598034858704},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27861297130584717},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.09593448042869568}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/3749646.3749713","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3749646.3749713","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1991271936","https://openalex.org/W2069065514","https://openalex.org/W2077815765","https://openalex.org/W2161861392","https://openalex.org/W2606555609","https://openalex.org/W2899575547","https://openalex.org/W2914304175","https://openalex.org/W2963469388","https://openalex.org/W3122890974","https://openalex.org/W3148437589","https://openalex.org/W3198523333","https://openalex.org/W4245848455","https://openalex.org/W4251248674","https://openalex.org/W4284670538","https://openalex.org/W4310923309","https://openalex.org/W4389523900","https://openalex.org/W4389539730","https://openalex.org/W4399174722","https://openalex.org/W4400909484","https://openalex.org/W4401856724","https://openalex.org/W4401857375","https://openalex.org/W4404782542","https://openalex.org/W4411779666"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W3203889067","https://openalex.org/W3184725726","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W2378793138","https://openalex.org/W4396701345"],"abstract_inverted_index":{"Most":[0],"recently,":[1],"researchers":[2],"have":[3],"started":[4],"building":[5],"large":[6],"language":[7],"models":[8],"(LLMs)":[9],"powered":[10],"data":[11],"systems":[12,60],"that":[13,206],"allow":[14],"users":[15],"to":[16,50,71,109,127,135,144,185],"analyze":[17],"unstructured":[18,100],"text":[19,124],"documents":[20],"like":[21],"working":[22],"with":[23,235],"a":[24,93,183],"database":[25],"because":[26,160],"LLMs":[27],"are":[28,78],"very":[29],"effective":[30],"in":[31,68,80],"extracting":[32],"attributes":[33,130],"from":[34],"documents.":[35,177],"In":[36],"such":[37],"systems,":[38],"LLM-based":[39],"extraction":[40,115,163],"operations":[41],"constitute":[42],"the":[43,51,63,111,123,128,146,161,187,218,228],"performance":[44],"bottleneck":[45],"of":[46,95,113,148,189,220],"query":[47,64,73,157],"execution":[48,74,158,204],"due":[49],"high":[52],"monetary":[53],"cost":[54,112,164,224],"and":[55,131,200],"slow":[56],"LLM":[57,82,195],"inference.":[58],"Existing":[59],"typically":[61],"borrow":[62],"optimization":[65,97],"principles":[66],"popular":[67],"relational":[69],"databases":[70],"produce":[72],"plans,":[75],"which":[76,91],"unfortunately":[77],"ineffective":[79],"minimizing":[81],"cost.":[83],"To":[84],"fill":[85],"this":[86,118],"gap,":[87],"we":[88,104,138,153],"propose":[89],"QUEST,":[90,221],"features":[92],"bunch":[94],"novel":[96],"strategies":[98,199],"for":[99,175],"document":[101,168],"analysis.":[102],"First,":[103],"introduce":[105],"an":[106,140,155,201],"index-based":[107],"strategy":[108,143],"minimize":[110,186],"each":[114,179],"operation.":[116],"With":[117],"index,":[119],"QUEST":[120,171,181],"quickly":[121],"retrieves":[122],"segments":[125],"relevant":[126,150],"target":[129],"only":[132],"feeds":[133],"them":[134],"LLMs.":[136],"Furthermore,":[137],"design":[139],"evidence-augmented":[141],"retrieval":[142],"reduce":[145],"possibility":[147],"missing":[149],"segments.":[151],"Moreover,":[152],"develop":[154],"instance-optimized":[156],"strategy:":[159],"attribute":[162,190],"could":[165],"vary":[166],"significantly":[167],"by":[169,231],"document,":[170,180],"produces":[172,182],"different":[173,176],"plans":[174],"For":[178],"plan":[184],"frequency":[188],"extraction.":[191],"The":[192],"innovations":[193],"include":[194],"cost-aware":[196],"operator":[197],"ordering":[198],"optimized":[202],"join":[203],"approach":[205],"transforms":[207],"joins":[208],"into":[209],"filters.":[210],"Extensive":[211],"experiments":[212],"on":[213],"3":[214],"real-world":[215],"datasets":[216],"demonstrate":[217],"superiority":[219],"achieving":[222],"30%-6\u00d7":[223],"savings":[225],"while":[226],"improving":[227],"F1":[229],"score":[230],"10%":[232],"-27%":[233],"compared":[234],"state-of-the-art":[236],"baselines.":[237]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
