{"id":"https://openalex.org/W2945136398","doi":"https://doi.org/10.1007/s11390-019-1923-y","title":"A Large Chinese Text Dataset in the Wild","display_name":"A Large Chinese Text Dataset in the Wild","publication_year":2019,"publication_date":"2019-05-01","ids":{"openalex":"https://openalex.org/W2945136398","doi":"https://doi.org/10.1007/s11390-019-1923-y","mag":"2945136398"},"language":"en","primary_location":{"id":"doi:10.1007/s11390-019-1923-y","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s11390-019-1923-y","pdf_url":null,"source":{"id":"https://openalex.org/S161516442","display_name":"Journal of Computer Science and Technology","issn_l":"1000-9000","issn":["1000-9000","1860-4749"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computer Science and Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087755974","display_name":"Tailing Yuan","orcid":"https://orcid.org/0000-0002-6119-8829"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tai-Ling Yuan","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, 100084, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, 100084, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102748017","display_name":"Zhe Zhu","orcid":"https://orcid.org/0000-0001-7315-9547"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhe Zhu","raw_affiliation_strings":["Department of Radiology, Duke University, Durham, NC, 27708, U.S.A"],"affiliations":[{"raw_affiliation_string":"Department of Radiology, Duke University, Durham, NC, 27708, U.S.A","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029335824","display_name":"Kun Xu","orcid":"https://orcid.org/0000-0002-2671-4170"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kun Xu","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, 100084, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, 100084, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025442534","display_name":"LI Cheng-jun","orcid":null},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng-Jun Li","raw_affiliation_strings":["Tencent Technology (Beijing) Co. Ltd., Beijing, 100080, China"],"affiliations":[{"raw_affiliation_string":"Tencent Technology (Beijing) Co. Ltd., Beijing, 100080, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067592804","display_name":"Tai\u2010Jiang Mu","orcid":"https://orcid.org/0000-0002-9197-346X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tai-Jiang Mu","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, 100084, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, 100084, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037233582","display_name":"Shi\u2010Min Hu","orcid":"https://orcid.org/0000-0001-7507-6542"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shi-Min Hu","raw_affiliation_strings":["Department of Computer Science and Technology, Tsinghua University, Beijing, 100084, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Technology, Tsinghua University, Beijing, 100084, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5087755974"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890},"apc_paid":null,"fwci":4.6977,"has_fulltext":false,"cited_by_count":116,"citation_normalized_percentile":{"value":0.95955074,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":"34","issue":"3","first_page":"509","last_page":"521"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12707","display_name":"Vehicle License Plate Recognition","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.992900013923645,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8485325574874878},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.7771207094192505},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6755048036575317},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.618018627166748},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.605536162853241},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5374909043312073},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.5306313037872314},{"id":"https://openalex.org/keywords/chinese-characters","display_name":"Chinese characters","score":0.4973895847797394},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4744773209095001},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4678652286529541},{"id":"https://openalex.org/keywords/text-detection","display_name":"Text detection","score":0.4544612765312195},{"id":"https://openalex.org/keywords/character-encoding","display_name":"Character encoding","score":0.43904614448547363},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.415925532579422},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37458670139312744},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.33826857805252075}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8485325574874878},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.7771207094192505},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6755048036575317},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.618018627166748},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.605536162853241},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5374909043312073},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.5306313037872314},{"id":"https://openalex.org/C2781051154","wikidata":"https://www.wikidata.org/wiki/Q8201","display_name":"Chinese characters","level":2,"score":0.4973895847797394},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4744773209095001},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4678652286529541},{"id":"https://openalex.org/C2983589003","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Text detection","level":3,"score":0.4544612765312195},{"id":"https://openalex.org/C32717103","wikidata":"https://www.wikidata.org/wiki/Q184759","display_name":"Character encoding","level":3,"score":0.43904614448547363},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.415925532579422},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37458670139312744},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.33826857805252075},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s11390-019-1923-y","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s11390-019-1923-y","pdf_url":null,"source":{"id":"https://openalex.org/S161516442","display_name":"Journal of Computer Science and Technology","issn_l":"1000-9000","issn":["1000-9000","1860-4749"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computer Science and Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Zero hunger","score":0.5600000023841858,"id":"https://metadata.un.org/sdg/2"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1488125194","https://openalex.org/W1491389626","https://openalex.org/W1579279110","https://openalex.org/W1607307044","https://openalex.org/W1861492603","https://openalex.org/W1972065312","https://openalex.org/W1981283549","https://openalex.org/W1985846974","https://openalex.org/W1998042868","https://openalex.org/W2037227137","https://openalex.org/W2049951199","https://openalex.org/W2064245055","https://openalex.org/W2067749276","https://openalex.org/W2069472161","https://openalex.org/W2078997308","https://openalex.org/W2083954025","https://openalex.org/W2097117768","https://openalex.org/W2099247484","https://openalex.org/W2108598243","https://openalex.org/W2124404372","https://openalex.org/W2135231474","https://openalex.org/W2137857605","https://openalex.org/W2142159465","https://openalex.org/W2163605009","https://openalex.org/W2193145675","https://openalex.org/W2194187530","https://openalex.org/W2194775991","https://openalex.org/W2253806798","https://openalex.org/W2335728318","https://openalex.org/W2339589954","https://openalex.org/W2479866714","https://openalex.org/W2507296351","https://openalex.org/W2519818067","https://openalex.org/W2520000195","https://openalex.org/W2550687635","https://openalex.org/W2570343428","https://openalex.org/W2605982830","https://openalex.org/W2962773189","https://openalex.org/W2962798895","https://openalex.org/W2964065044","https://openalex.org/W3106250896"],"related_works":["https://openalex.org/W2384047089","https://openalex.org/W2099085593","https://openalex.org/W1816713494","https://openalex.org/W2024883617","https://openalex.org/W2353987689","https://openalex.org/W2381492279","https://openalex.org/W2885707217","https://openalex.org/W4306707571","https://openalex.org/W3179897446","https://openalex.org/W182205727"],"abstract_inverted_index":null,"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":21},{"year":2024,"cited_by_count":16},{"year":2023,"cited_by_count":28},{"year":2022,"cited_by_count":21},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":10},{"year":2019,"cited_by_count":3}],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2025-10-10T00:00:00"}
