{"id":"https://openalex.org/W4406192799","doi":"https://doi.org/10.14778/3705829.3705843","title":"LEAP: LLM-Powered End-to-End Automatic Library for Processing Social Science Queries on Unstructured Data","display_name":"LEAP: LLM-Powered End-to-End Automatic Library for Processing Social Science Queries on Unstructured Data","publication_year":2024,"publication_date":"2024-10-01","ids":{"openalex":"https://openalex.org/W4406192799","doi":"https://doi.org/10.14778/3705829.3705843"},"language":"en","primary_location":{"id":"doi:10.14778/3705829.3705843","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3705829.3705843","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2501.03892","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060192796","display_name":"Chuxuan Hu","orcid":"https://orcid.org/0009-0001-3746-2722"},"institutions":[{"id":"https://openalex.org/I183874917","display_name":"Urbana University","ror":"https://ror.org/04kp3hw27","country_code":"US","type":"education","lineage":["https://openalex.org/I183874917"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chuxuan Hu","raw_affiliation_strings":["UIUC, Urbana, IL"],"affiliations":[{"raw_affiliation_string":"UIUC, Urbana, IL","institution_ids":["https://openalex.org/I183874917"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048021375","display_name":"Austin Peters","orcid":"https://orcid.org/0000-0003-0200-1042"},"institutions":[{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Austin Peters","raw_affiliation_strings":["University of Chicago, Chicago, IL"],"affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, IL","institution_ids":["https://openalex.org/I40347166"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072348548","display_name":"Daniel Kang","orcid":"https://orcid.org/0000-0001-9860-9938"},"institutions":[{"id":"https://openalex.org/I183874917","display_name":"Urbana University","ror":"https://ror.org/04kp3hw27","country_code":"US","type":"education","lineage":["https://openalex.org/I183874917"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Kang","raw_affiliation_strings":["UIUC, Urbana, IL"],"affiliations":[{"raw_affiliation_string":"UIUC, Urbana, IL","institution_ids":["https://openalex.org/I183874917"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5060192796"],"corresponding_institution_ids":["https://openalex.org/I183874917"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.2308389,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"18","issue":"2","first_page":"253","last_page":"264"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.991100013256073,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9811000227928162,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/end-to-end-principle","display_name":"End-to-end principle","score":0.7747979164123535},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7493484020233154},{"id":"https://openalex.org/keywords/unstructured-data","display_name":"Unstructured data","score":0.7311016917228699},{"id":"https://openalex.org/keywords/end-user","display_name":"End user","score":0.633315920829773},{"id":"https://openalex.org/keywords/end-of-history","display_name":"End of history","score":0.4920641779899597},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.46428146958351135},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3815334439277649},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.33116620779037476},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.2062256932258606},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.19712534546852112},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.15002384781837463}],"concepts":[{"id":"https://openalex.org/C74296488","wikidata":"https://www.wikidata.org/wiki/Q2527392","display_name":"End-to-end principle","level":2,"score":0.7747979164123535},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7493484020233154},{"id":"https://openalex.org/C2781252014","wikidata":"https://www.wikidata.org/wiki/Q1141900","display_name":"Unstructured data","level":3,"score":0.7311016917228699},{"id":"https://openalex.org/C91262260","wikidata":"https://www.wikidata.org/wiki/Q528074","display_name":"End user","level":2,"score":0.633315920829773},{"id":"https://openalex.org/C2778935963","wikidata":"https://www.wikidata.org/wiki/Q13218530","display_name":"End of history","level":3,"score":0.4920641779899597},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.46428146958351135},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3815334439277649},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.33116620779037476},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.2062256932258606},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.19712534546852112},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.15002384781837463},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.14778/3705829.3705843","is_oa":false,"landing_page_url":"https://doi.org/10.14778/3705829.3705843","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2501.03892","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2501.03892","pdf_url":"https://arxiv.org/pdf/2501.03892","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2501.03892","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2501.03892","pdf_url":"https://arxiv.org/pdf/2501.03892","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4406192799.pdf","grobid_xml":"https://content.openalex.org/works/W4406192799.grobid-xml"},"referenced_works_count":84,"referenced_works":["https://openalex.org/W4277775","https://openalex.org/W972213760","https://openalex.org/W1495512916","https://openalex.org/W1536741282","https://openalex.org/W1736702067","https://openalex.org/W1968380849","https://openalex.org/W2096619464","https://openalex.org/W2142189376","https://openalex.org/W2156884493","https://openalex.org/W2159397589","https://openalex.org/W2163833659","https://openalex.org/W2171468534","https://openalex.org/W2250773804","https://openalex.org/W2251738400","https://openalex.org/W2460159515","https://openalex.org/W2612228435","https://openalex.org/W2740751204","https://openalex.org/W2752236330","https://openalex.org/W2760124296","https://openalex.org/W2769358515","https://openalex.org/W2791544114","https://openalex.org/W2802126503","https://openalex.org/W2890431379","https://openalex.org/W2890727862","https://openalex.org/W2891575196","https://openalex.org/W2923446382","https://openalex.org/W2945102109","https://openalex.org/W2946358633","https://openalex.org/W2949340035","https://openalex.org/W2949678053","https://openalex.org/W2956105246","https://openalex.org/W2962713807","https://openalex.org/W2963323070","https://openalex.org/W2963955897","https://openalex.org/W2970252517","https://openalex.org/W2970395295","https://openalex.org/W3022499311","https://openalex.org/W3034514149","https://openalex.org/W3034835156","https://openalex.org/W3035172316","https://openalex.org/W3042379185","https://openalex.org/W3048307099","https://openalex.org/W3081158114","https://openalex.org/W3085011441","https://openalex.org/W3093603380","https://openalex.org/W3093819145","https://openalex.org/W3094450620","https://openalex.org/W3101112860","https://openalex.org/W3101295217","https://openalex.org/W3102577836","https://openalex.org/W3103667349","https://openalex.org/W3104982372","https://openalex.org/W3105463319","https://openalex.org/W3106460864","https://openalex.org/W3134427152","https://openalex.org/W3169088712","https://openalex.org/W3170666909","https://openalex.org/W3170721718","https://openalex.org/W3170759063","https://openalex.org/W3173453598","https://openalex.org/W3176646110","https://openalex.org/W3183955733","https://openalex.org/W3185432972","https://openalex.org/W3205301323","https://openalex.org/W3209972423","https://openalex.org/W4226353085","https://openalex.org/W4239960582","https://openalex.org/W4248081654","https://openalex.org/W4281739679","https://openalex.org/W4285105218","https://openalex.org/W4300827275","https://openalex.org/W4308146474","https://openalex.org/W4317716303","https://openalex.org/W4366660081","https://openalex.org/W4367047044","https://openalex.org/W4385570473","https://openalex.org/W4385570741","https://openalex.org/W4385571778","https://openalex.org/W4385574006","https://openalex.org/W4389519120","https://openalex.org/W4389636360","https://openalex.org/W4392453936","https://openalex.org/W4392846214","https://openalex.org/W4394717710"],"related_works":["https://openalex.org/W3016188207","https://openalex.org/W4299590256","https://openalex.org/W2151749779","https://openalex.org/W4286908490","https://openalex.org/W3179968364","https://openalex.org/W4406318200","https://openalex.org/W4393212338","https://openalex.org/W4367860523","https://openalex.org/W2417674618","https://openalex.org/W1538841260"],"abstract_inverted_index":{"Social":[0],"scientists":[1],"are":[2,109,150],"increasingly":[3],"interested":[4],"in":[5,28,62,77,136],"analyzing":[6],"the":[7,18,40,46,60,148,163],"semantic":[8,19,41],"information":[9,20,42],"(e.g.,":[10,15],"emotion)":[11],"of":[12,71,106,205],"unstructured":[13,164],"data":[14,165],"Tweets),":[16],"where":[17],"is":[21],"not":[22],"natively":[23],"present.":[24],"Performing":[25],"this":[26,51],"analysis":[27],"a":[29,68,104,187,200],"cost-efficient":[30],"manner":[31],"requires":[32],"using":[33],"machine":[34],"learning":[35],"(ML)":[36],"models":[37],"to":[38,117,145,161,166,178,180],"extract":[39],"and":[43,80,96,100,152,157,175,192],"subsequently":[44],"analyze":[45],"now":[47],"structured":[48,167],"data.":[49],"However,":[50],"process":[52],"remains":[53],"challenging":[54],"for":[55],"domain":[56],"experts.":[57],"To":[58,121],"demonstrate":[59],"challenges":[61],"social":[63,74,133],"science":[64,75,134],"analytics,":[65],"we":[66,125],"collect":[67],"dataset,":[69],"QUIET-ML,":[70,198],"120":[72],"real-world":[73],"queries":[76,90,108,135,144],"natural":[78,115,137,182],"language":[79,116,138,183],"their":[81],"ground":[82],"truth":[83],"answers.":[84],"Existing":[85],"systems":[86,119],"struggle":[87],"with":[88,139,169,199],"these":[89,107,123,181],"since":[91],"(1)":[92],"they":[93],"require":[94],"selecting":[95],"applying":[97],"ML":[98,159],"models,":[99],"(2)":[101],"more":[102],"than":[103],"quarter":[105],"vague,":[110],"making":[111],"standard":[112],"tools":[113],"like":[114],"SQL":[118],"unsuited.":[120],"address":[122],"issues,":[124],"develop":[126],"LEAP,":[127],"an":[128],"end-to-end":[129,203],"library":[130],"that":[131,147],"answers":[132,149],"ML.":[140],"LEAP":[141,172,185],"filters":[142],"vague":[143],"ensure":[146],"deterministic":[151],"selects":[153],"from":[154],"internally":[155],"supported":[156],"user-defined":[158],"functions":[160],"extend":[162],"tables":[168],"necessary":[170],"annotations.":[171],"further":[173],"generates":[174],"executes":[176],"code":[177,207],"respond":[179],"queries.":[184],"achieves":[186],"100%":[188],"pass":[189,194],"@":[190,195],"3":[191],"92%":[193],"1":[196],"on":[197],"$1.06":[201],"average":[202],"cost,":[204],"which":[206],"generation":[208],"costs":[209],"$0.02.":[210]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
