{"id":"https://openalex.org/W3011554092","doi":"https://doi.org/10.1017/s1351324920000121","title":"Classification of regional and genre varieties of Chinese: A correspondence analysis approach based on comparable balanced corpora","display_name":"Classification of regional and genre varieties of Chinese: A correspondence analysis approach based on comparable balanced corpora","publication_year":2020,"publication_date":"2020-03-09","ids":{"openalex":"https://openalex.org/W3011554092","doi":"https://doi.org/10.1017/s1351324920000121","mag":"3011554092"},"language":"en","primary_location":{"id":"doi:10.1017/s1351324920000121","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324920000121","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082120765","display_name":"Renkui Hou","orcid":null},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]},{"id":"https://openalex.org/I37987034","display_name":"Guangzhou University","ror":"https://ror.org/05ar8rn06","country_code":"CN","type":"education","lineage":["https://openalex.org/I37987034"]}],"countries":["CN","HK"],"is_corresponding":false,"raw_author_name":"Renkui Hou","raw_affiliation_strings":["Department of Chinese and Bilingual Studies, The Hong Kong Polytechnic University, Kowloon, Hong Kong","College of Humanities, Guangzhou University, Guangzhou, China"],"raw_orcid":"https://orcid.org/0000-0003-2510-6277","affiliations":[{"raw_affiliation_string":"Department of Chinese and Bilingual Studies, The Hong Kong Polytechnic University, Kowloon, Hong Kong","institution_ids":["https://openalex.org/I14243506"]},{"raw_affiliation_string":"College of Humanities, Guangzhou University, Guangzhou, China","institution_ids":["https://openalex.org/I37987034"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024924150","display_name":"Chu\u2010Ren Huang","orcid":"https://orcid.org/0000-0002-8526-5520"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Chu-Ren Huang","raw_affiliation_strings":["Department of Chinese and Bilingual Studies, The Hong Kong Polytechnic University, Kowloon, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0002-8526-5520","affiliations":[{"raw_affiliation_string":"Department of Chinese and Bilingual Studies, The Hong Kong Polytechnic University, Kowloon, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5024924150"],"corresponding_institution_ids":["https://openalex.org/I14243506"],"apc_list":null,"apc_paid":null,"fwci":0.8154,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.78467402,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"26","issue":"6","first_page":"613","last_page":"640"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11640","display_name":"Linguistic Variation and Morphology","score":0.9835000038146973,"subfield":{"id":"https://openalex.org/subfields/3310","display_name":"Linguistics and Language"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8102759122848511},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.7125785946846008},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6489461660385132},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5823701620101929},{"id":"https://openalex.org/keywords/mainland-china","display_name":"Mainland China","score":0.49678948521614075},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.47065892815589905},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.41635626554489136},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.39577576518058777},{"id":"https://openalex.org/keywords/china","display_name":"China","score":0.1680660843849182},{"id":"https://openalex.org/keywords/history","display_name":"History","score":0.10074254870414734}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8102759122848511},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.7125785946846008},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6489461660385132},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5823701620101929},{"id":"https://openalex.org/C107029721","wikidata":"https://www.wikidata.org/wiki/Q19188","display_name":"Mainland China","level":3,"score":0.49678948521614075},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.47065892815589905},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.41635626554489136},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.39577576518058777},{"id":"https://openalex.org/C191935318","wikidata":"https://www.wikidata.org/wiki/Q148","display_name":"China","level":2,"score":0.1680660843849182},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.10074254870414734},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1017/s1351324920000121","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324920000121","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5799999833106995,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":66,"referenced_works":["https://openalex.org/W61584101","https://openalex.org/W76669076","https://openalex.org/W86907710","https://openalex.org/W113189822","https://openalex.org/W175025436","https://openalex.org/W643183242","https://openalex.org/W962175086","https://openalex.org/W968157591","https://openalex.org/W1532868709","https://openalex.org/W1592805114","https://openalex.org/W1968826646","https://openalex.org/W1976711150","https://openalex.org/W1982697162","https://openalex.org/W1983480945","https://openalex.org/W1986636423","https://openalex.org/W2006197442","https://openalex.org/W2044340178","https://openalex.org/W2047402194","https://openalex.org/W2053782355","https://openalex.org/W2093790243","https://openalex.org/W2107251449","https://openalex.org/W2110976849","https://openalex.org/W2125860909","https://openalex.org/W2142435445","https://openalex.org/W2165417675","https://openalex.org/W2165431734","https://openalex.org/W2250568751","https://openalex.org/W2252059851","https://openalex.org/W2352423486","https://openalex.org/W2402382476","https://openalex.org/W2429628802","https://openalex.org/W2476868877","https://openalex.org/W2487270460","https://openalex.org/W2488580118","https://openalex.org/W2561747913","https://openalex.org/W2578062929","https://openalex.org/W2587610201","https://openalex.org/W2604607843","https://openalex.org/W2608065105","https://openalex.org/W2807811483","https://openalex.org/W2918032103","https://openalex.org/W2927315230","https://openalex.org/W2939097945","https://openalex.org/W2941028947","https://openalex.org/W2956292147","https://openalex.org/W2962270437","https://openalex.org/W2963960505","https://openalex.org/W3004889924","https://openalex.org/W3011170240","https://openalex.org/W3106159200","https://openalex.org/W3119615209","https://openalex.org/W3119782651","https://openalex.org/W3161552047","https://openalex.org/W3200634688","https://openalex.org/W4211087396","https://openalex.org/W4229577959","https://openalex.org/W4239075165","https://openalex.org/W4243041937","https://openalex.org/W4285719527","https://openalex.org/W6603411205","https://openalex.org/W6631617854","https://openalex.org/W6732371348","https://openalex.org/W6986627565","https://openalex.org/W6997158807","https://openalex.org/W7055868249","https://openalex.org/W7061722477"],"related_works":["https://openalex.org/W2374317326","https://openalex.org/W2990005675","https://openalex.org/W1603321096","https://openalex.org/W2394766824","https://openalex.org/W2078713291","https://openalex.org/W2361574037","https://openalex.org/W2386292991","https://openalex.org/W2364440891","https://openalex.org/W2393726922","https://openalex.org/W2366752344"],"abstract_inverted_index":{"Abstract":[0],"This":[1,239],"paper":[2],"proposes":[3],"a":[4,72],"robust":[5],"text":[6,151],"classification":[7,40,83,152,193],"and":[8,29,39,52,101,132,142,148,153,170,223],"correspondence":[9,154],"analysis":[10,155],"approach":[11],"to":[12,21,33,118,206],"identification":[13],"of":[14,27,66,69,71,104,136,177,194,216,264],"similar":[15,35,75,265],"languages.":[16,36,266],"In":[17,182,197],"particular,":[18,198],"we":[19,97,184],"propose":[20],"use":[22],"the":[23,44,61,127,133,143,159,174,212,232,235,251,256],"readily":[24],"available":[25],"information":[26],"clauses":[28,171],"word":[30,169],"length":[31],"distribution":[32],"model":[34,164],"The":[37,109,123],"modeling":[38],"are":[41,48,80,111,126,146],"based":[42,84],"on":[43,85,192],"hypothesis":[45,243],"that":[46,158,186,244],"languages":[47,76],"self-adaptive":[49],"complex":[50,213,246],"systems":[51],"hence":[53],"can":[54],"be":[55,90,207],"classified":[56,208],"by":[57,255],"dynamic":[58,86,245],"features":[59,88],"describing":[60],"system,":[62],"especially":[63],"in":[64,200,262],"terms":[65],"distributional":[67],"relations":[68],"constituents":[70],"system.":[73,238],"For":[74],"whose":[77],"grammatical":[78],"differences":[79],"often":[81],"subtle,":[82],"system":[87,214,247],"should":[89],"more":[91,204],"effective.":[92],"To":[93],"test":[94],"this":[95],"hypothesis,":[96],"considered":[98],"both":[99,180],"regional":[100,195],"genre":[102],"varieties":[103,176,202],"Mandarin":[105,137],"Chinese":[106,138,178],"for":[107,179,234],"classification.":[108],"data":[110],"extracted":[112],"from":[113,130,139],"two":[114,124,144,175,201],"comparable":[115],"balanced":[116],"corpora":[117,125],"minimize":[119],"possible":[120],"confounding":[121],"factors.":[122],"Sinica":[128],"Corpus":[129,135],"Taiwan":[131],"Lancaster":[134],"Mainland":[140],"China,":[141],"genres":[145,187],"reportage":[147,199],"review.":[149],"Our":[150],"results":[156],"show":[157],"linguistically":[160],"felicitous":[161],"two-level":[162],"constituency":[163],"combining":[165],"power":[166,252],"functions":[167,253],"between":[168],"effectively":[172],"classifies":[173],"genres.":[181],"addition,":[183],"found":[185],"do":[188,226],"have":[189],"compounding":[190],"effect":[191],"varieties.":[196],"is":[203],"likely":[205],"than":[209],"review,":[210],"corroborating":[211],"view":[215],"language":[217,221,237],"variations.":[218],"That":[219],"is,":[220],"variations":[222],"changes":[224],"typically":[225],"not":[227],"take":[228],"place":[229],"evenly":[230],"across":[231],"board":[233],"complete":[236],"further":[240],"enhances":[241],"our":[242],"features,":[248],"such":[249],"as":[250],"captured":[254],"Menzerath\u2013Altmann":[257],"law,":[258],"provide":[259],"effective":[260],"models":[261],"classifications":[263]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
