{"id":"https://openalex.org/W2234779324","doi":"https://doi.org/10.1145/2837689.2837691","title":"Spatial characteristics of a large web n-gram corpus","display_name":"Spatial characteristics of a large web n-gram corpus","publication_year":2015,"publication_date":"2015-11-26","ids":{"openalex":"https://openalex.org/W2234779324","doi":"https://doi.org/10.1145/2837689.2837691","mag":"2234779324"},"language":"en","primary_location":{"id":"doi:10.1145/2837689.2837691","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2837689.2837691","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th Workshop on Geographic Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070657764","display_name":"Jerome Sautier","orcid":null},"institutions":[{"id":"https://openalex.org/I202697423","display_name":"University of Zurich","ror":"https://ror.org/02crff812","country_code":"CH","type":"education","lineage":["https://openalex.org/I202697423"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Jerome Sautier","raw_affiliation_strings":["University of Zurich"],"affiliations":[{"raw_affiliation_string":"University of Zurich","institution_ids":["https://openalex.org/I202697423"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5054560475","display_name":"Curdin Derungs","orcid":"https://orcid.org/0000-0001-7334-4427"},"institutions":[{"id":"https://openalex.org/I202697423","display_name":"University of Zurich","ror":"https://ror.org/02crff812","country_code":"CH","type":"education","lineage":["https://openalex.org/I202697423"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Curdin Derungs","raw_affiliation_strings":["University of Zurich"],"affiliations":[{"raw_affiliation_string":"University of Zurich","institution_ids":["https://openalex.org/I202697423"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5070657764"],"corresponding_institution_ids":["https://openalex.org/I202697423"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.14694093,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"2"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10757","display_name":"Geographic Information Systems Studies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/3305","display_name":"Geography, Planning and Development"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10757","display_name":"Geographic Information Systems Studies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/3305","display_name":"Geography, Planning and Development"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7662332057952881},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.7294028997421265},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.6309618949890137},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.6054710149765015},{"id":"https://openalex.org/keywords/spatial-relation","display_name":"Spatial relation","score":0.5382418632507324},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.47820091247558594},{"id":"https://openalex.org/keywords/spatial-analysis","display_name":"Spatial analysis","score":0.4379427433013916},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.42835670709609985},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.42781898379325867},{"id":"https://openalex.org/keywords/index","display_name":"Index (typography)","score":0.42710384726524353},{"id":"https://openalex.org/keywords/toponymy","display_name":"Toponymy","score":0.4139668643474579},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3185245096683502},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.2311633825302124},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.18311798572540283},{"id":"https://openalex.org/keywords/remote-sensing","display_name":"Remote sensing","score":0.07931345701217651}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7662332057952881},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.7294028997421265},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.6309618949890137},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.6054710149765015},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.5382418632507324},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.47820091247558594},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.4379427433013916},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42835670709609985},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.42781898379325867},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.42710384726524353},{"id":"https://openalex.org/C116856471","wikidata":"https://www.wikidata.org/wiki/Q485762","display_name":"Toponymy","level":2,"score":0.4139668643474579},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3185245096683502},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2311633825302124},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.18311798572540283},{"id":"https://openalex.org/C62649853","wikidata":"https://www.wikidata.org/wiki/Q199687","display_name":"Remote sensing","level":1,"score":0.07931345701217651},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/2837689.2837691","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2837689.2837691","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th Workshop on Geographic Information Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:www.zora.uzh.ch:120412","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401281","display_name":"Zurich Open Repository and Archive (University of Zurich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I202697423","host_organization_name":"University of Zurich","host_organization_lineage":["https://openalex.org/I202697423"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"Sautier, Jerome; Derungs, Curdin  (2015). Spatial characteristics of a large web n-gram corpus.  In: GIR '15 9th Workshop on Geographic Information Retrieval, Paris, 26 November 2015 - 27 November 2015. ACM Digital Library, online.","raw_type":"Conference or Workshop Item"},{"id":"doi:10.5167/uzh-120412","is_oa":true,"landing_page_url":"https://doi.org/10.5167/uzh-120412","pdf_url":null,"source":{"id":"https://openalex.org/S7407051291","display_name":"Universit\u00e4t Z\u00fcrich, ZORA","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"pmh:oai:www.zora.uzh.ch:120412","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401281","display_name":"Zurich Open Repository and Archive (University of Zurich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I202697423","host_organization_name":"University of Zurich","host_organization_lineage":["https://openalex.org/I202697423"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"Sautier, Jerome; Derungs, Curdin  (2015). Spatial characteristics of a large web n-gram corpus.  In: GIR '15 9th Workshop on Geographic Information Retrieval, Paris, 26 November 2015 - 27 November 2015. ACM Digital Library, online.","raw_type":"Conference or Workshop Item"},"sustainable_development_goals":[{"score":0.699999988079071,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W2032115636","https://openalex.org/W2116597721","https://openalex.org/W2145833060","https://openalex.org/W2885762459","https://openalex.org/W3149302861"],"related_works":["https://openalex.org/W2353179089","https://openalex.org/W3172220373","https://openalex.org/W4223915615","https://openalex.org/W2181412350","https://openalex.org/W2737684396","https://openalex.org/W2520082489","https://openalex.org/W2362913948","https://openalex.org/W2116266067","https://openalex.org/W2364189591","https://openalex.org/W2028237718"],"abstract_inverted_index":{"N-gram":[0],"corpora,":[1,12],"though":[2],"prominently":[3],"used":[4],"to":[5,99],"structure":[6],"and":[7,47,55,72],"index":[8],"large":[9,36],"natural":[10],"language":[11],"are":[13,50,78],"rarely":[14],"in":[15,27,34,52],"the":[16],"focus":[17],"of":[18],"GIR.":[19],"In":[20],"this":[21,28,53],"study":[22],"we":[23],"describe":[24],"a":[25,35],"step":[26],"direction":[29],"by":[30,41],"characterizing":[31],"spatial":[32,96],"information":[33,97],"Web":[37],"n-gram":[38],"corpus":[39,54],"provided":[40],"Microsoft.":[42],"We":[43,85],"explore":[44],"how":[45],"continent":[46],"country":[48],"toponyms":[49],"represented":[51],"if":[56,92],"basic":[57],"topological":[58,76],"relations":[59,77],"can":[60],"be":[61,100],"correctly":[62],"retrieved.":[63],"Results":[64],"suggest":[65],"that":[66,73,87],"toponym":[67],"ambiguity":[68],"has":[69],"major":[70],"impact":[71],"although":[74],"retrieved":[75,101],"often":[79],"correct,":[80],"recall":[81],"is":[82,90,98],"considerably":[83],"low.":[84],"conclude":[86],"further":[88],"research":[89],"required":[91],"more":[93],"fine":[94],"grained":[95],"from":[102],"n-grams.":[103]},"counts_by_year":[],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
