{"id":"https://openalex.org/W4417070120","doi":"https://doi.org/10.1145/3769829","title":"ST-Raptor: LLM-Powered Semi-Structured Table Question Answering","display_name":"ST-Raptor: LLM-Powered Semi-Structured Table Question Answering","publication_year":2025,"publication_date":"2025-12-04","ids":{"openalex":"https://openalex.org/W4417070120","doi":"https://doi.org/10.1145/3769829"},"language":"en","primary_location":{"id":"doi:10.1145/3769829","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3769829","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013386932","display_name":"Zhen\u2010Xing Tang","orcid":"https://orcid.org/0009-0003-3449-4248"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zirui Tang","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120474599","display_name":"Boyu Niu","orcid":"https://orcid.org/0009-0003-2343-643X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Boyu Niu","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056912386","display_name":"Xuanhe Zhou","orcid":"https://orcid.org/0000-0002-2285-7836"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuanhe Zhou","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101511719","display_name":"Bingxue Li","orcid":"https://orcid.org/0009-0008-6313-2138"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Boxiu Li","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102722470","display_name":"Wei Zhou","orcid":"https://orcid.org/0009-0000-8862-7753"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Zhou","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064909746","display_name":"J. Wang","orcid":"https://orcid.org/0000-0002-8229-3622"},"institutions":[{"id":"https://openalex.org/I18014758","display_name":"Simon Fraser University","ror":"https://ror.org/0213rcc28","country_code":"CA","type":"education","lineage":["https://openalex.org/I18014758"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jiannan Wang","raw_affiliation_strings":["Simon Fraser University, Vancouver, Canada"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University, Vancouver, Canada","institution_ids":["https://openalex.org/I18014758"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100451576","display_name":"Guoliang Li","orcid":"https://orcid.org/0000-0002-1398-0621"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoliang Li","raw_affiliation_strings":["Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101511155","display_name":"Xinyi Zhang","orcid":"https://orcid.org/0000-0003-1653-2485"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinyi Zhang","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059190563","display_name":"Fan Wu","orcid":"https://orcid.org/0000-0003-0965-9058"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fan Wu","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5013386932"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":4.2213,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.94950861,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"3","issue":"6","first_page":"1","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.48829999566078186,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.48829999566078186,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.07020000368356705,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.061799999326467514,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.7081999778747559},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.637499988079071},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.532800018787384},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5317999720573425},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5310999751091003},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5299999713897705},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.5149999856948853}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8212000131607056},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.7081999778747559},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.637499988079071},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.532800018787384},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5317999720573425},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5310999751091003},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5299999713897705},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.5149999856948853},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.47850000858306885},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4453999996185303},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3698999881744385},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3684000074863434},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.35030001401901245},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3490999937057495},{"id":"https://openalex.org/C163797641","wikidata":"https://www.wikidata.org/wiki/Q2067937","display_name":"Tree structure","level":3,"score":0.34279999136924744},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.33469998836517334},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.3221000134944916},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.29249998927116394},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2903999984264374},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2897000014781952},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.275299996137619},{"id":"https://openalex.org/C144986985","wikidata":"https://www.wikidata.org/wiki/Q871236","display_name":"Hierarchical database model","level":2,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3769829","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3769829","pdf_url":null,"source":{"id":"https://openalex.org/S4387289859","display_name":"Proceedings of the ACM on Management of Data","issn_l":"2836-6573","issn":["2836-6573"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Management of Data","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W2185907055","https://openalex.org/W2963899988","https://openalex.org/W3085495757","https://openalex.org/W4385572142","https://openalex.org/W4401042314","https://openalex.org/W4401042562","https://openalex.org/W4402684297","https://openalex.org/W4404250052","https://openalex.org/W4407356147","https://openalex.org/W4412888342"],"related_works":[],"abstract_inverted_index":{"Semi-structured":[0],"tables,":[1],"widely":[2],"used":[3],"in":[4,167,252],"real-world":[5,238],"applications":[6],"(e.g.,":[7,20],"financial":[8],"reports,":[9],"medical":[10],"records,":[11],"transactional":[12],"orders),":[13],"often":[14,69],"involve":[15],"flexible":[16],"and":[17,23,37,46,78,91,151,187],"complex":[18,86,133],"layouts":[19,36,87],"hierarchical":[21],"headers":[22],"merged":[24],"cells).":[25],"These":[26],"tables":[27,64,90],"generally":[28],"rely":[29],"on":[30],"human":[31],"analysts":[32],"to":[33,83,164,250],"interpret":[34],"table":[35,108,113,135],"answer":[38,94,216,253],"relevant":[39],"natural":[40],"language":[41,118],"questions,":[42],"which":[43,68],"is":[44,257],"costly":[45],"inefficient.":[47],"To":[48,97,224],"automate":[49],"the":[50,85,123,144,206,226],"procedure,":[51],"existing":[52],"methods":[53,58,75],"face":[54],"significant":[55],"challenges.":[56],"First,":[57,120],"like":[59,76],"NL2SQL":[60],"require":[61],"converting":[62],"semi-structured":[63,89,107,112,134,239],"into":[65,179],"structured":[66],"ones,":[67],"causes":[70],"substantial":[71],"information":[72],"loss.":[73],"Second,":[74,155],"NL2Code":[77],"multi-modal":[79],"LLM":[80],"QA":[81,114,170],"struggle":[82],"understand":[84],"of":[88,160,208,233],"cannot":[92],"accurately":[93],"corresponding":[95,183],"questions.":[96],"this":[98],"end,":[99],"we":[100,121,156,196,228],"propose":[101],"ST-Raptor,":[102],"a":[103,128,158,173,198,231],"tree-based":[104],"framework":[105],"for":[106,142,191],"question":[109],"answering":[110],"(":[111],")":[115],"using":[116],"large":[117],"models.":[119],"introduce":[122],"Hierarchical":[124],"Orthogonal":[125],"Tree":[126],"(HO-Tree),":[127],"structural":[129],"model":[130],"that":[131,243],"captures":[132],"layouts,":[136],"along":[137],"with":[138],"an":[139],"effective":[140],"algorithm":[141],"constructing":[143],"tree":[145,162,184],"by":[146,218,248],"identifying":[147],"headers,":[148],"content":[149],"values,":[150],"their":[152],"implicit":[153],"relationships.":[154],"define":[157],"set":[159],"basic":[161],"operations":[163],"guide":[165],"LLMs":[166],"executing":[168],"common":[169],"tasks.":[171],"Given":[172],"user":[174],"question,":[175],"ST-Raptor":[176,244],"decomposes":[177],"it":[178],"simpler":[180],"sub-questions,":[181],"generates":[182],"operation":[185],"pipelines,":[186],"conducts":[188],"operation-table":[189],"alignment":[190],"accurate":[192],"pipeline":[193],"execution.":[194],"Third,":[195],"incorporate":[197],"two-stage":[199],"verification":[200],"mechanism:":[201],"(1)":[202],"forward":[203],"validation":[204,214],"checks":[205],"correctness":[207],"execution":[209],"steps,":[210],"while":[211],"(2)":[212],"backward":[213],"evaluates":[215],"reliability":[217],"reconstructing":[219],"queries":[220],"from":[221],"predicted":[222],"answers.":[223],"benchmark":[225],"performance,":[227],"present":[229],"SSTQA,":[230],"dataset":[232],"764":[234],"questions":[235],"over":[236],"102":[237],"tables.":[240],"Experiments":[241],"show":[242],"outperforms":[245],"nine":[246],"baselines":[247],"up":[249],"20%":[251],"accuracy.":[254],"The":[255],"code":[256],"available":[258],"at":[259],"https://github.com/weAIDB/ST-Raptor.":[260]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-12-06T00:00:00"}
