{"id":"https://openalex.org/W7118589389","doi":"https://doi.org/10.48550/arxiv.2601.01415","title":"A Tool for Semantic-Aware Spatial Corpus Construction","display_name":"A Tool for Semantic-Aware Spatial Corpus Construction","publication_year":2026,"publication_date":"2026-01-04","ids":{"openalex":"https://openalex.org/W7118589389","doi":"https://doi.org/10.48550/arxiv.2601.01415"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.01415","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.01415","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.01415","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122080126","display_name":"Wei Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Huang, Wei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122275171","display_name":"Xieyang Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Xieyang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082787636","display_name":"Jianqiu Xu","orcid":"https://orcid.org/0000-0002-0929-5234"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Jianqiu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5122064027","display_name":"Guidong Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Guidong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5122080126"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.635699987411499,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.635699987411499,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.15410000085830688,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10757","display_name":"Geographic Information Systems Studies","score":0.10779999941587448,"subfield":{"id":"https://openalex.org/subfields/3305","display_name":"Geography, Planning and Development"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.6370000243186951},{"id":"https://openalex.org/keywords/rdf-query-language","display_name":"RDF query language","score":0.5712000131607056},{"id":"https://openalex.org/keywords/query-language","display_name":"Query language","score":0.5271999835968018},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5202000141143799},{"id":"https://openalex.org/keywords/natural-language-user-interface","display_name":"Natural language user interface","score":0.5090000033378601},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.4715999960899353},{"id":"https://openalex.org/keywords/spatial-analysis","display_name":"Spatial analysis","score":0.4546999931335449},{"id":"https://openalex.org/keywords/spatial-query","display_name":"Spatial query","score":0.44999998807907104}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8098000288009644},{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.6370000243186951},{"id":"https://openalex.org/C96956885","wikidata":"https://www.wikidata.org/wiki/Q6138701","display_name":"RDF query language","level":5,"score":0.5712000131607056},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.5271999835968018},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5202000141143799},{"id":"https://openalex.org/C174252522","wikidata":"https://www.wikidata.org/wiki/Q3816772","display_name":"Natural language user interface","level":3,"score":0.5090000033378601},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.4715999960899353},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.4546999931335449},{"id":"https://openalex.org/C172722865","wikidata":"https://www.wikidata.org/wiki/Q2302053","display_name":"Spatial query","level":5,"score":0.44999998807907104},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.448199987411499},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.44279998540878296},{"id":"https://openalex.org/C203689450","wikidata":"https://www.wikidata.org/wiki/Q2302053","display_name":"Spatial database","level":3,"score":0.42590001225471497},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.42179998755455017},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41260001063346863},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.38510000705718994},{"id":"https://openalex.org/C157692150","wikidata":"https://www.wikidata.org/wiki/Q2919848","display_name":"Query optimization","level":2,"score":0.38089999556541443},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.37450000643730164},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.36250001192092896},{"id":"https://openalex.org/C32977378","wikidata":"https://www.wikidata.org/wiki/Q604737","display_name":"Data control language","level":5,"score":0.35899999737739563},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.3314000070095062},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.30880001187324524},{"id":"https://openalex.org/C68859911","wikidata":"https://www.wikidata.org/wiki/Q1503724","display_name":"Pattern matching","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.2808000147342682},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.27889999747276306},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2759999930858612},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.26499998569488525}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.01415","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.01415","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.01415","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.01415","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Spatial":[0],"natural":[1,17,26,73,172],"language":[2,18,27,74,78,173],"interface":[3,174],"to":[4,13,132],"database":[5],"systems":[6],"provide":[7],"non-expert":[8],"users":[9],"with":[10],"convenient":[11],"access":[12],"spatial":[14,25,62,72,96,102,133,138,171],"data":[15],"through":[16],"queries.":[19],"However,":[20],"the":[21,31,136],"scarcity":[22],"of":[23,33,84],"high-quality":[24,71,167],"query":[28,75,79,110,117,162],"corpora":[29],"limits":[30],"performance":[32],"such":[34],"systems.":[35],"Existing":[36],"methods":[37],"rely":[38],"on":[39,95],"manual":[40],"knowledge":[41,90,152],"base":[42,91,153],"construction":[43,52,64,92,154],"and":[44,54,76,100,106,122,130,155,180],"template-based":[45],"dynamic":[46],"generation,":[47],"which":[48,98,115],"suffer":[49],"from":[50,104],"low":[51],"efficiency":[53,149],"unstable":[55],"corpus":[56,63,112,168,184],"quality.":[57],"This":[58],"paper":[59],"presents":[60],"semantic-aware":[61],"(SSCC),":[65],"a":[66,89,108,147,157],"tool":[67,126],"designed":[68],"for":[69,151,161,170],"constructing":[70],"executable":[77],"pair":[80,111,163],"corpora.":[81],"SSCC":[82,144,165],"consists":[83],"two":[85],"core":[86],"modules:":[87],"(i)":[88,146],"module":[93],"based":[94],"relations,":[97],"extracts":[99],"determines":[101],"relations":[103],"datasets,":[105],"(ii)":[107,156],"template-augmented":[109],"generation":[113],"module,":[114],"produces":[116],"pairs":[118],"via":[119],"template":[120],"matching":[121],"parameter":[123],"substitution.":[124],"The":[125],"ensures":[127],"geometric":[128],"consistency":[129],"adherence":[131],"logic":[134],"in":[135,183],"generated":[137],"relations.":[139],"Experimental":[140],"results":[141],"demonstrate":[142],"that":[143],"achieves":[145],"53x":[148],"improvement":[150,160],"2.5x":[158],"effectiveness":[159],"corpus.":[164],"provides":[166],"support":[169],"training,":[175],"substantially":[176],"reducing":[177],"both":[178],"time":[179],"labor":[181],"costs":[182],"construction.":[185]},"counts_by_year":[],"updated_date":"2026-01-08T20:10:11.968330","created_date":"2026-01-08T00:00:00"}
