{"id":"https://openalex.org/W4404225537","doi":"https://doi.org/10.3390/data9110134","title":"The Design of a Script Identification Algorithm and Its Application in Constructing a Text Language Identification Dataset","display_name":"The Design of a Script Identification Algorithm and Its Application in Constructing a Text Language Identification Dataset","publication_year":2024,"publication_date":"2024-11-11","ids":{"openalex":"https://openalex.org/W4404225537","doi":"https://doi.org/10.3390/data9110134"},"language":"en","primary_location":{"id":"doi:10.3390/data9110134","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data9110134","pdf_url":null,"source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.3390/data9110134","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114588364","display_name":"Mamtimin Qasim","orcid":"https://orcid.org/0000-0002-8860-951X"},"institutions":[{"id":"https://openalex.org/I4210106134","display_name":"Guangzhou Vocational College of Science and Technology","ror":"https://ror.org/01dan7p53","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210106134"]},{"id":"https://openalex.org/I4400600917","display_name":"Guangzhou College of Commerce","ror":"https://ror.org/04f0j5d06","country_code":null,"type":"education","lineage":["https://openalex.org/I4400600917"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Mamtimin Qasim","raw_affiliation_strings":["School of Information Technology and Engineering, Guangzhou College of Commerce, Guangzhou 511363, China"],"raw_orcid":"https://orcid.org/0000-0002-8860-951X","affiliations":[{"raw_affiliation_string":"School of Information Technology and Engineering, Guangzhou College of Commerce, Guangzhou 511363, China","institution_ids":["https://openalex.org/I4210106134","https://openalex.org/I4400600917"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022844590","display_name":"Wushouer Silamu","orcid":"https://orcid.org/0009-0006-7944-1889"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wushour Silamu","raw_affiliation_strings":["Key Multi-Lingual Laboratory of Xinjiang, Urumqi 830046, China","School of Information Science and Engineering, Xinjiang University, Urumqi 830046, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Multi-Lingual Laboratory of Xinjiang, Urumqi 830046, China","institution_ids":[]},{"raw_affiliation_string":"School of Information Science and Engineering, Xinjiang University, Urumqi 830046, China","institution_ids":["https://openalex.org/I96908189"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113223789","display_name":"Minghui Qiu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210106134","display_name":"Guangzhou Vocational College of Science and Technology","ror":"https://ror.org/01dan7p53","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210106134"]},{"id":"https://openalex.org/I4400600917","display_name":"Guangzhou College of Commerce","ror":"https://ror.org/04f0j5d06","country_code":null,"type":"education","lineage":["https://openalex.org/I4400600917"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Minghui Qiu","raw_affiliation_strings":["School of Information Technology and Engineering, Guangzhou College of Commerce, Guangzhou 511363, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Information Technology and Engineering, Guangzhou College of Commerce, Guangzhou 511363, China","institution_ids":["https://openalex.org/I4210106134","https://openalex.org/I4400600917"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5114588364"],"corresponding_institution_ids":["https://openalex.org/I4210106134","https://openalex.org/I4400600917"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":0.6623,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.76090358,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"9","issue":"11","first_page":"134","last_page":"134"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.978600025177002,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9672999978065491,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.8516967296600342},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6724814772605896},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5788518190383911},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.4406862258911133},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4140024781227112},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.1466158628463745}],"concepts":[{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.8516967296600342},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6724814772605896},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5788518190383911},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.4406862258911133},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4140024781227112},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.1466158628463745},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/data9110134","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data9110134","pdf_url":null,"source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},{"id":"pmh:oai:RePEc:gam:jdataj:v:9:y:2024:i:11:p:134-:d:1518130","is_oa":false,"landing_page_url":"https://www.mdpi.com/2306-5729/9/11/134/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},{"id":"pmh:oai:doaj.org/article:ed204bdd78684d8d9ade9540573af4b6","is_oa":true,"landing_page_url":"https://doaj.org/article/ed204bdd78684d8d9ade9540573af4b6","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Data, Vol 9, Iss 11, p 134 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/data9110134","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data9110134","pdf_url":null,"source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.75,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G2204016764","display_name":null,"funder_award_id":"22GYB159","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4896784468","display_name":null,"funder_award_id":"62137002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7303037936","display_name":null,"funder_award_id":"GJGJZD20210408092806017","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W92274471","https://openalex.org/W244375653","https://openalex.org/W2122637965","https://openalex.org/W2145895572","https://openalex.org/W2164394510","https://openalex.org/W2183047366","https://openalex.org/W2197073048","https://openalex.org/W2222932682","https://openalex.org/W2251452598","https://openalex.org/W2620806258","https://openalex.org/W2757188127","https://openalex.org/W2795267607","https://openalex.org/W2962937786","https://openalex.org/W3154338429","https://openalex.org/W6640788013","https://openalex.org/W6684320781","https://openalex.org/W6688975865","https://openalex.org/W6691408520"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W3204019825","https://openalex.org/W3213549959","https://openalex.org/W3108387573","https://openalex.org/W2924380321","https://openalex.org/W3082797515","https://openalex.org/W2271356425","https://openalex.org/W4383616786"],"abstract_inverted_index":{"Script":[0],"identification":[1,11,25,30,49,64,72,125,142,151,158],"is":[2,13,160],"easier":[3],"to":[4,45,162],"implement":[5],"than":[6],"language":[7,24,48,71,150],"identification,":[8],"and":[9,42,60,66],"its":[10],"rate":[12,31],"very":[14],"high.":[15],"The":[16,78],"fewer":[17],"languages":[18,41],"are":[19,96,110],"identified":[20],"when":[21,147],"using":[22],"a":[23,62,70,122,156],"algorithm,":[26],"the":[27,29,67,91,104,132,149],"higher":[28],"is.":[32],"However,":[33],"no":[34],"systematic":[35],"study":[36],"on":[37,75,117,128],"SI":[38],"involving":[39],"multiple":[40],"determining":[43],"how":[44],"construct":[46],"relevant":[47],"datasets":[50],"has":[51],"been":[52],"conducted.":[53],"Therefore,":[54],"in":[55,81,139,167],"this":[56,82,118],"paper,":[57],"we":[58,120],"discuss":[59],"design":[61],"script":[63,76,101,124,141,157],"algorithm":[65,126,159],"construction":[68],"of":[69,135],"dataset":[73,152],"based":[74,127],"groups.":[77,102],"data":[79],"sources":[80],"paper":[83],"comprise":[84],"261":[85],"different":[86,100,108,113],"languages\u2019":[87],"text":[88],"corpora":[89],"from":[90],"Leipzig":[92],"Corpora":[93],"Collection,":[94],"which":[95,136],"grouped":[97],"into":[98,112],"23":[99],"In":[103],"Unicode":[105],"encoding":[106],"scheme,":[107],"scripts":[109],"arranged":[111],"code":[114],"regions.":[115],"Based":[116],"feature,":[119],"propose":[121],"written":[123],"regular":[129],"expression":[130],"matching,":[131],"micro":[133],"F-score":[134],"reaches":[137],"0.9929":[138],"sentence-level":[140],"experiments.":[143],"To":[144],"reduce":[145],"noise":[146],"constructing":[148],"for":[153],"each":[154,168],"script,":[155],"used":[161],"filter":[163],"out":[164],"other-script":[165],"content":[166],"text.":[169]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
