{"id":"https://openalex.org/W4322767884","doi":"https://doi.org/10.1080/19475683.2023.2186487","title":"BERTCWS: unsupervised multi-granular Chinese word segmentation based on a BERT method for the geoscience domain","display_name":"BERTCWS: unsupervised multi-granular Chinese word segmentation based on a BERT method for the geoscience domain","publication_year":2023,"publication_date":"2023-03-02","ids":{"openalex":"https://openalex.org/W4322767884","doi":"https://doi.org/10.1080/19475683.2023.2186487"},"language":"en","primary_location":{"id":"doi:10.1080/19475683.2023.2186487","is_oa":true,"landing_page_url":"https://doi.org/10.1080/19475683.2023.2186487","pdf_url":null,"source":{"id":"https://openalex.org/S4210199948","display_name":"Annals of GIS","issn_l":"1947-5683","issn":["1947-5683","1947-5691"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of GIS","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1080/19475683.2023.2186487","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063543521","display_name":"Qinjun Qiu","orcid":"https://orcid.org/0000-0002-9850-3751"},"institutions":[{"id":"https://openalex.org/I3124059619","display_name":"China University of Geosciences","ror":"https://ror.org/04gcegc37","country_code":"CN","type":"education","lineage":["https://openalex.org/I3124059619"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qinjun Qiu","raw_affiliation_strings":["Hubei Key Laboratory of Intelligent Geo-Information Processing, China University of Geosciences, Wuhan, China","School of Computer Science, China University of Geosciences, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Hubei Key Laboratory of Intelligent Geo-Information Processing, China University of Geosciences, Wuhan, China","institution_ids":["https://openalex.org/I3124059619"]},{"raw_affiliation_string":"School of Computer Science, China University of Geosciences, Wuhan, China","institution_ids":["https://openalex.org/I3124059619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100457293","display_name":"Zhong Xie","orcid":"https://orcid.org/0000-0002-4669-5923"},"institutions":[{"id":"https://openalex.org/I3124059619","display_name":"China University of Geosciences","ror":"https://ror.org/04gcegc37","country_code":"CN","type":"education","lineage":["https://openalex.org/I3124059619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhong Xie","raw_affiliation_strings":["Hubei Key Laboratory of Intelligent Geo-Information Processing, China University of Geosciences, Wuhan, China","School of Computer Science, China University of Geosciences, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Hubei Key Laboratory of Intelligent Geo-Information Processing, China University of Geosciences, Wuhan, China","institution_ids":["https://openalex.org/I3124059619"]},{"raw_affiliation_string":"School of Computer Science, China University of Geosciences, Wuhan, China","institution_ids":["https://openalex.org/I3124059619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014532726","display_name":"Kai Ma","orcid":"https://orcid.org/0000-0001-5432-1166"},"institutions":[{"id":"https://openalex.org/I161350542","display_name":"China Three Gorges University","ror":"https://ror.org/0419nfc77","country_code":"CN","type":"education","lineage":["https://openalex.org/I161350542"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Ma","raw_affiliation_strings":["College of Computer and Information Technology, China Three Gorges University, Yichang, China","Hubei Key Laboratory of Intelligent Vision Based Monitoring for Hydroelectric Engineering, China Three Gorges University, Yichang, Hubei, China"],"affiliations":[{"raw_affiliation_string":"College of Computer and Information Technology, China Three Gorges University, Yichang, China","institution_ids":["https://openalex.org/I161350542"]},{"raw_affiliation_string":"Hubei Key Laboratory of Intelligent Vision Based Monitoring for Hydroelectric Engineering, China Three Gorges University, Yichang, Hubei, China","institution_ids":["https://openalex.org/I161350542"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034879141","display_name":"Miao Tian","orcid":"https://orcid.org/0000-0002-6800-9760"},"institutions":[{"id":"https://openalex.org/I161350542","display_name":"China Three Gorges University","ror":"https://ror.org/0419nfc77","country_code":"CN","type":"education","lineage":["https://openalex.org/I161350542"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Miao Tian","raw_affiliation_strings":["College of Computer and Information Technology, China Three Gorges University, Yichang, China","Hubei Key Laboratory of Intelligent Vision Based Monitoring for Hydroelectric Engineering, China Three Gorges University, Yichang, Hubei, China"],"affiliations":[{"raw_affiliation_string":"College of Computer and Information Technology, China Three Gorges University, Yichang, China","institution_ids":["https://openalex.org/I161350542"]},{"raw_affiliation_string":"Hubei Key Laboratory of Intelligent Vision Based Monitoring for Hydroelectric Engineering, China Three Gorges University, Yichang, Hubei, China","institution_ids":["https://openalex.org/I161350542"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5063543521"],"corresponding_institution_ids":["https://openalex.org/I3124059619"],"apc_list":{"value":1500,"currency":"USD","value_usd":1500},"apc_paid":{"value":1500,"currency":"USD","value_usd":1500},"fwci":1.0454,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.80186859,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"29","issue":"3","first_page":"387","last_page":"399"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9814000129699707,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7696342468261719},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7202174067497253},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.6569312810897827},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.6156688332557678},{"id":"https://openalex.org/keywords/discriminator","display_name":"Discriminator","score":0.6050992012023926},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5663361549377441},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.520435631275177},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5127533674240112},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4879262149333954},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4183996021747589},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3409503102302551},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.18778112530708313},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11421868205070496}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7696342468261719},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7202174067497253},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.6569312810897827},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.6156688332557678},{"id":"https://openalex.org/C2779803651","wikidata":"https://www.wikidata.org/wiki/Q5282088","display_name":"Discriminator","level":3,"score":0.6050992012023926},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5663361549377441},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.520435631275177},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5127533674240112},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4879262149333954},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4183996021747589},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3409503102302551},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.18778112530708313},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11421868205070496},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1080/19475683.2023.2186487","is_oa":true,"landing_page_url":"https://doi.org/10.1080/19475683.2023.2186487","pdf_url":null,"source":{"id":"https://openalex.org/S4210199948","display_name":"Annals of GIS","issn_l":"1947-5683","issn":["1947-5683","1947-5691"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of GIS","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:aaeb6a1c3e3e4ffba25b669919800e80","is_oa":true,"landing_page_url":"https://doaj.org/article/aaeb6a1c3e3e4ffba25b669919800e80","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Annals of GIS, Vol 29, Iss 3, Pp 387-399 (2023)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1080/19475683.2023.2186487","is_oa":true,"landing_page_url":"https://doi.org/10.1080/19475683.2023.2186487","pdf_url":null,"source":{"id":"https://openalex.org/S4210199948","display_name":"Annals of GIS","issn_l":"1947-5683","issn":["1947-5683","1947-5691"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of GIS","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6499999761581421,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G3564949726","display_name":null,"funder_award_id":"U1711267","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6887970928","display_name":null,"funder_award_id":"41871305","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7805900907","display_name":null,"funder_award_id":"41871311","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1975724541","https://openalex.org/W1979285701","https://openalex.org/W1982498087","https://openalex.org/W2005244073","https://openalex.org/W2052061796","https://openalex.org/W2166987457","https://openalex.org/W2408754830","https://openalex.org/W2541833187","https://openalex.org/W2788824299","https://openalex.org/W2790981989","https://openalex.org/W2891555348","https://openalex.org/W2892118517","https://openalex.org/W2896457183","https://openalex.org/W2911855264","https://openalex.org/W2913726998","https://openalex.org/W2944729560","https://openalex.org/W2947990748","https://openalex.org/W2962885853","https://openalex.org/W2968187429","https://openalex.org/W3034503989","https://openalex.org/W3081260973","https://openalex.org/W4295126626"],"related_works":["https://openalex.org/W2357339972","https://openalex.org/W2161919705","https://openalex.org/W2072278013","https://openalex.org/W2951061418","https://openalex.org/W2374337513","https://openalex.org/W2398978578","https://openalex.org/W2972846703","https://openalex.org/W2804033347","https://openalex.org/W2354180811","https://openalex.org/W2358684813"],"abstract_inverted_index":{"Unlike":[0],"alphabet-based":[1],"languages":[2],"such":[3],"as":[4],"English,":[5],"the":[6,17,34,56,96,102,130,136,139],"Chinese":[7,18,25,39,84],"language":[8],"has":[9],"no":[10],"specifying":[11],"word":[12,40,85],"boundaries.":[13],"Segmentation,":[14],"particularly":[15],"for":[16,82],"language,":[19],"is":[20,87,123,132],"a":[21,44,48,73,113,164],"fundamental":[22],"step":[23],"towards":[24],"text":[26],"processing,":[27],"information":[28],"retrieval,":[29],"and":[30,47,55,76,120,153],"knowledge":[31],"discovery.":[32],"In":[33],"geoscience":[35,109,168],"domain,":[36],"most":[37],"existing":[38],"segmentation":[41,57,86,122,158],"tools/models":[42],"require":[43],"prespecified":[45],"dictionary":[46],"large":[49],"amount":[50],"of":[51,138,166,170],"relevant":[52],"training":[53],"corpus,":[54],"accuracies":[58],"drop":[59],"significantly":[60],"when":[61],"processing":[62],"out-domain":[63],"situations":[64],"using":[65],"these":[66],"same":[67],"methods.":[68],"To":[69],"address":[70],"this":[71],"issue,":[72],"purely":[74],"unsupervised":[75],"generic":[77],"two-stage":[78],"architecture":[79],"(named":[80],"BERTCWS)":[81],"domain-specific":[83],"proposed.":[88],"We":[89],"first":[90],"design":[91],"an":[92],"incidence":[93],"matrix":[94],"termed":[95],"\u2018character":[97],"combination":[98],"tightness\u2019":[99],"to":[100,134,162],"calculate":[101],"closeness":[103],"between":[104],"characters.":[105],"Then,":[106],"BERTCWS":[107,147],"recognizes":[108],"terms":[110,152,169],"based":[111],"on":[112],"Bidirectional":[114],"Encoder":[115],"Representations":[116],"from":[117],"Transformers(BERT)-based":[118],"segmenter,":[119],"multi-granular":[121,157],"generated":[124],"by":[125],"setting":[126],"different":[127],"thresholds.":[128],"Finally,":[129],"discriminator":[131],"constructed":[133],"validate":[135],"correctness":[137],"segmented":[140],"words.":[141],"Our":[142],"numerical":[143],"study":[144],"demonstrates":[145],"that":[146],"can":[148],"identify":[149],"both":[150],"general-domain":[151],"geoscience-domain":[154],"terms.":[155],"Additionally,":[156],"could":[159],"be":[160],"applied":[161],"offer":[163],"set":[165],"potential":[167],"various":[171],"lengths.":[172]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2026-02-28T09:26:25.869077","created_date":"2025-10-10T00:00:00"}
