{"id":"https://openalex.org/W4408821677","doi":"https://doi.org/10.3390/data10040043","title":"Improved Script Identification Algorithm Using Unicode-Based Regular Expression Matching Strategy","display_name":"Improved Script Identification Algorithm Using Unicode-Based Regular Expression Matching Strategy","publication_year":2025,"publication_date":"2025-03-25","ids":{"openalex":"https://openalex.org/W4408821677","doi":"https://doi.org/10.3390/data10040043"},"language":"en","primary_location":{"id":"doi:10.3390/data10040043","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data10040043","pdf_url":"https://www.mdpi.com/2306-5729/10/4/43/pdf?version=1742900670","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2306-5729/10/4/43/pdf?version=1742900670","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114588364","display_name":"Mamtimin Qasim","orcid":"https://orcid.org/0000-0002-8860-951X"},"institutions":[{"id":"https://openalex.org/I4210106134","display_name":"Guangzhou Vocational College of Science and Technology","ror":"https://ror.org/01dan7p53","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210106134"]},{"id":"https://openalex.org/I4400600917","display_name":"Guangzhou College of Commerce","ror":"https://ror.org/04f0j5d06","country_code":null,"type":"education","lineage":["https://openalex.org/I4400600917"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Mamtimin Qasim","raw_affiliation_strings":["School of Information Technology and Engineering, Guangzhou College of Commerce, Guangzhou 511363, China"],"raw_orcid":"https://orcid.org/0000-0002-8860-951X","affiliations":[{"raw_affiliation_string":"School of Information Technology and Engineering, Guangzhou College of Commerce, Guangzhou 511363, China","institution_ids":["https://openalex.org/I4210106134","https://openalex.org/I4400600917"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022844590","display_name":"Wushouer Silamu","orcid":"https://orcid.org/0009-0006-7944-1889"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wushour Silamu","raw_affiliation_strings":["Key Multi-Lingual Laboratory of Xinjiang, Urumqi 830046, China","School of Computer Science and Technology, Xinjiang University, Urumqi 830046, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Key Multi-Lingual Laboratory of Xinjiang, Urumqi 830046, China","institution_ids":[]},{"raw_affiliation_string":"School of Computer Science and Technology, Xinjiang University, Urumqi 830046, China","institution_ids":["https://openalex.org/I96908189"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5114588364"],"corresponding_institution_ids":["https://openalex.org/I4210106134","https://openalex.org/I4400600917"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0237535,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"10","issue":"4","first_page":"43","last_page":"43"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9832000136375427,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9832000136375427,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9830999970436096,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9484999775886536,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/unicode","display_name":"Unicode","score":0.9083136320114136},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.6461144089698792},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6204841732978821},{"id":"https://openalex.org/keywords/regular-expression","display_name":"Regular expression","score":0.5597897171974182},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5557095408439636},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.5466483235359192},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.5177644491195679},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2794041335582733},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.26677921414375305},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.21304216980934143},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.06700992584228516},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.06676694750785828}],"concepts":[{"id":"https://openalex.org/C500551929","wikidata":"https://www.wikidata.org/wiki/Q8819","display_name":"Unicode","level":2,"score":0.9083136320114136},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.6461144089698792},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6204841732978821},{"id":"https://openalex.org/C121329065","wikidata":"https://www.wikidata.org/wiki/Q185612","display_name":"Regular expression","level":2,"score":0.5597897171974182},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5557095408439636},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.5466483235359192},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5177644491195679},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2794041335582733},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.26677921414375305},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.21304216980934143},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.06700992584228516},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.06676694750785828},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/data10040043","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data10040043","pdf_url":"https://www.mdpi.com/2306-5729/10/4/43/pdf?version=1742900670","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:6e1a83de8b764f5ebfc6346342f9293f","is_oa":true,"landing_page_url":"https://doaj.org/article/6e1a83de8b764f5ebfc6346342f9293f","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Data, Vol 10, Iss 4, p 43 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/data10040043","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data10040043","pdf_url":"https://www.mdpi.com/2306-5729/10/4/43/pdf?version=1742900670","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1642130822","display_name":null,"funder_award_id":"24YJCZH142","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4896784468","display_name":null,"funder_award_id":"62137002","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7303037936","display_name":null,"funder_award_id":"GJGJZD20210408092806017","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4408821677.pdf"},"referenced_works_count":12,"referenced_works":["https://openalex.org/W2122637965","https://openalex.org/W2145895572","https://openalex.org/W2164394510","https://openalex.org/W2183047366","https://openalex.org/W2222932682","https://openalex.org/W2620806258","https://openalex.org/W2795267607","https://openalex.org/W2962937786","https://openalex.org/W3154338429","https://openalex.org/W4404225537","https://openalex.org/W6684320781","https://openalex.org/W6688975865"],"related_works":["https://openalex.org/W1953677193","https://openalex.org/W3113362864","https://openalex.org/W2404000518","https://openalex.org/W589182691","https://openalex.org/W1993328117","https://openalex.org/W2358376734","https://openalex.org/W4288900686","https://openalex.org/W2899274982","https://openalex.org/W3123905808","https://openalex.org/W127410472"],"abstract_inverted_index":{"While":[0],"script":[1,22,39,53,87,97,108,117,135,148,169,182,208],"identification":[2,23,54,88,136,183,209],"is":[3,19,90,153],"the":[4,32,92,103,112,120,126,175,181,187,190,198],"first":[5],"step":[6],"in":[7,44,47,174,180],"many":[8],"natural":[9],"language":[10],"processing":[11,81],"and":[12,40,66,105,124,155],"text":[13,80,176],"mining":[14],"tasks,":[15],"at":[16],"present,":[17],"there":[18],"no":[20],"open-source":[21],"algorithm":[24,200,210],"for":[25,95],"text.":[26],"For":[27],"this":[28,45,139],"reason,":[29],"we":[30,110,141],"analyze":[31,111],"Unicode":[33,122,147],"encoding":[34,113],"of":[35,38,107,115,189],"each":[36,96,116],"type":[37],"construct":[41],"regular":[42,93],"expressions":[43],"study,":[46],"order":[48],"to":[49,64,131,165],"design":[50,132],"an":[51,133],"improved":[52,134],"algorithm.":[55,137],"Because":[56],"some":[57,72],"scripts":[58,74],"share":[59],"common":[60],"characters,":[61,128],"it\u2019s":[62],"impossible":[63],"count":[65],"summarize":[67],"them.":[68],"As":[69],"a":[70,85],"result,":[71],"extracted":[73],"are":[75,177],"incomplete,":[76],"which":[77],"affects":[78],"subsequent":[79],"tasks;":[82],"furthermore,":[83,171],"if":[84],"new":[86],"feature":[89],"required,":[91],"expression":[94],"must":[98],"be":[99,166],"re-adjusted.":[100],"To":[101],"improve":[102],"performance":[104],"scalability":[106],"identification,":[109],"range":[114],"provided":[118,191],"on":[119,214],"official":[121],"website":[123],"identify":[125],"shared":[127],"allowing":[129],"us":[130],"Using":[138],"approach,":[140],"can":[142],"fully":[143],"consider":[144],"all":[145],"169":[146],"types.":[149],"The":[150,193],"proposed":[151,199],"method":[152],"scalable":[154],"does":[156],"not":[157],"require":[158],"numbers,":[159],"punctuation":[160],"marks,":[161],"or":[162],"other":[163],"symbols":[164],"filtered":[167],"during":[168],"identification;":[170],"these":[172],"items":[173],"also":[178],"included":[179],"results,":[184],"thus":[185],"ensuring":[186],"integrity":[188],"information.":[192],"experimental":[194],"results":[195],"show":[196],"that":[197],"performs":[201],"almost":[202],"as":[203,205],"well":[204],"our":[206],"previous":[207],"while":[211],"providing":[212],"improvements":[213],"its":[215],"basis.":[216]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
