{"id":"https://openalex.org/W4206707056","doi":"https://doi.org/10.1017/s1351324921000425","title":"UNLT: Urdu Natural Language Toolkit","display_name":"UNLT: Urdu Natural Language Toolkit","publication_year":2022,"publication_date":"2022-01-19","ids":{"openalex":"https://openalex.org/W4206707056","doi":"https://doi.org/10.1017/s1351324921000425"},"language":"en","primary_location":{"id":"doi:10.1017/s1351324921000425","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s1351324921000425","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/66306F671F7CB1056A004F1A166E8E30/S1351324921000425a.pdf/div-class-title-unlt-urdu-natural-language-toolkit-div.pdf","source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/66306F671F7CB1056A004F1A166E8E30/S1351324921000425a.pdf/div-class-title-unlt-urdu-natural-language-toolkit-div.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075488653","display_name":"Jawad Shafi","orcid":"https://orcid.org/0000-0001-6427-3823"},"institutions":[{"id":"https://openalex.org/I67415387","display_name":"Lancaster University","ror":"https://ror.org/04f2nsd36","country_code":"GB","type":"education","lineage":["https://openalex.org/I67415387"]},{"id":"https://openalex.org/I16076960","display_name":"COMSATS University Islamabad","ror":"https://ror.org/00nqqvk19","country_code":"PK","type":"education","lineage":["https://openalex.org/I16076960"]}],"countries":["GB","PK"],"is_corresponding":true,"raw_author_name":"Jawad Shafi","raw_affiliation_strings":["COMSATS University Islamabad, Lahore Campus, Pakistan","School of Computer and Communication (SCC), Lancaster University, Lancaster, UK"],"affiliations":[{"raw_affiliation_string":"COMSATS University Islamabad, Lahore Campus, Pakistan","institution_ids":["https://openalex.org/I16076960"]},{"raw_affiliation_string":"School of Computer and Communication (SCC), Lancaster University, Lancaster, UK","institution_ids":["https://openalex.org/I67415387"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032024249","display_name":"Hafiz Rizwan Iqbal","orcid":"https://orcid.org/0000-0002-2304-8459"},"institutions":[{"id":"https://openalex.org/I1323252656","display_name":"Information Technology University","ror":"https://ror.org/00ngv8j44","country_code":"PK","type":"education","lineage":["https://openalex.org/I1323252656"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Hafiz Rizwan Iqbal","raw_affiliation_strings":["Information Technology University, Lahore, Pakistan"],"affiliations":[{"raw_affiliation_string":"Information Technology University, Lahore, Pakistan","institution_ids":["https://openalex.org/I1323252656"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005027058","display_name":"Rao Muhammad Adeel Nawab","orcid":"https://orcid.org/0000-0002-1765-8904"},"institutions":[{"id":"https://openalex.org/I16076960","display_name":"COMSATS University Islamabad","ror":"https://ror.org/00nqqvk19","country_code":"PK","type":"education","lineage":["https://openalex.org/I16076960"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Rao Muhammad Adeel Nawab","raw_affiliation_strings":["COMSATS University Islamabad, Lahore Campus, Pakistan"],"affiliations":[{"raw_affiliation_string":"COMSATS University Islamabad, Lahore Campus, Pakistan","institution_ids":["https://openalex.org/I16076960"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058785189","display_name":"Paul Rayson","orcid":"https://orcid.org/0000-0002-1257-2191"},"institutions":[{"id":"https://openalex.org/I67415387","display_name":"Lancaster University","ror":"https://ror.org/04f2nsd36","country_code":"GB","type":"education","lineage":["https://openalex.org/I67415387"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Paul Rayson","raw_affiliation_strings":["School of Computer and Communication (SCC), Lancaster University, Lancaster, UK"],"affiliations":[{"raw_affiliation_string":"School of Computer and Communication (SCC), Lancaster University, Lancaster, UK","institution_ids":["https://openalex.org/I67415387"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5075488653"],"corresponding_institution_ids":["https://openalex.org/I16076960","https://openalex.org/I67415387"],"apc_list":null,"apc_paid":null,"fwci":3.1931,"has_fulltext":true,"cited_by_count":23,"citation_normalized_percentile":{"value":0.92634192,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"29","issue":"4","first_page":"942","last_page":"977"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9080484509468079},{"id":"https://openalex.org/keywords/lexical-analysis","display_name":"Lexical analysis","score":0.7868308424949646},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7259869575500488},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7172461748123169},{"id":"https://openalex.org/keywords/urdu","display_name":"Urdu","score":0.6490315794944763},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5307179689407349},{"id":"https://openalex.org/keywords/lemmatisation","display_name":"Lemmatisation","score":0.49889659881591797},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.41662928462028503},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1266515552997589}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9080484509468079},{"id":"https://openalex.org/C176982825","wikidata":"https://www.wikidata.org/wiki/Q835922","display_name":"Lexical analysis","level":2,"score":0.7868308424949646},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7259869575500488},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7172461748123169},{"id":"https://openalex.org/C2777350258","wikidata":"https://www.wikidata.org/wiki/Q1617","display_name":"Urdu","level":2,"score":0.6490315794944763},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5307179689407349},{"id":"https://openalex.org/C161831844","wikidata":"https://www.wikidata.org/wiki/Q2554325","display_name":"Lemmatisation","level":2,"score":0.49889659881591797},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.41662928462028503},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1266515552997589},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1017/s1351324921000425","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s1351324921000425","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/66306F671F7CB1056A004F1A166E8E30/S1351324921000425a.pdf/div-class-title-unlt-urdu-natural-language-toolkit-div.pdf","source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},{"id":"pmh:oai:eprints.lancs.ac.uk:165032","is_oa":true,"landing_page_url":null,"pdf_url":"https://eprints.lancs.ac.uk/id/eprint/165032/1/UNLT_Urdu_Natural_Language_Toolkit_V_15_November_2021.pdf","source":{"id":"https://openalex.org/S4306401916","display_name":"Lancaster EPrints (Lancaster University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67415387","host_organization_name":"Lancaster University","host_organization_lineage":["https://openalex.org/I67415387"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":{"id":"doi:10.1017/s1351324921000425","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s1351324921000425","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/66306F671F7CB1056A004F1A166E8E30/S1351324921000425a.pdf/div-class-title-unlt-urdu-natural-language-toolkit-div.pdf","source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.5199999809265137,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4206707056.pdf","grobid_xml":"https://content.openalex.org/works/W4206707056.grobid-xml"},"referenced_works_count":69,"referenced_works":["https://openalex.org/W143309896","https://openalex.org/W152203974","https://openalex.org/W182831726","https://openalex.org/W345642469","https://openalex.org/W1519945466","https://openalex.org/W1521626219","https://openalex.org/W1574901103","https://openalex.org/W1761993226","https://openalex.org/W1819641058","https://openalex.org/W1835136410","https://openalex.org/W1975879310","https://openalex.org/W1991133427","https://openalex.org/W2001334255","https://openalex.org/W2008056655","https://openalex.org/W2013623275","https://openalex.org/W2020448136","https://openalex.org/W2032585622","https://openalex.org/W2037789405","https://openalex.org/W2040298842","https://openalex.org/W2054460891","https://openalex.org/W2054533749","https://openalex.org/W2057560641","https://openalex.org/W2075517337","https://openalex.org/W2085177100","https://openalex.org/W2096797897","https://openalex.org/W2105989239","https://openalex.org/W2123442489","https://openalex.org/W2125838338","https://openalex.org/W2126905976","https://openalex.org/W2135843243","https://openalex.org/W2145514301","https://openalex.org/W2150349103","https://openalex.org/W2155280192","https://openalex.org/W2158195707","https://openalex.org/W2171774859","https://openalex.org/W2246244723","https://openalex.org/W2251264938","https://openalex.org/W2252067850","https://openalex.org/W2317438879","https://openalex.org/W2322283567","https://openalex.org/W2395660647","https://openalex.org/W2399720833","https://openalex.org/W2409439155","https://openalex.org/W2461981617","https://openalex.org/W2504059350","https://openalex.org/W2508023093","https://openalex.org/W2612929327","https://openalex.org/W2901223329","https://openalex.org/W2962965405","https://openalex.org/W2963250244","https://openalex.org/W2963979492","https://openalex.org/W3002035509","https://openalex.org/W3035390927","https://openalex.org/W3156159991","https://openalex.org/W4210469593","https://openalex.org/W4236722038","https://openalex.org/W4388317804","https://openalex.org/W6601566281","https://openalex.org/W6605572207","https://openalex.org/W6617544347","https://openalex.org/W6636672575","https://openalex.org/W6637852806","https://openalex.org/W6658647298","https://openalex.org/W6681828542","https://openalex.org/W6691492335","https://openalex.org/W6712390022","https://openalex.org/W6718931271","https://openalex.org/W6756184450","https://openalex.org/W6989886544"],"related_works":["https://openalex.org/W2574640638","https://openalex.org/W2202496758","https://openalex.org/W3089211180","https://openalex.org/W2973955309","https://openalex.org/W4308083916","https://openalex.org/W2969595679","https://openalex.org/W4382120354","https://openalex.org/W4387484878","https://openalex.org/W4206291365","https://openalex.org/W4211180823"],"abstract_inverted_index":{"Abstract":[0],"This":[1,73],"study":[2,74],"describes":[3],"a":[4,15,40,64,112,118,145,154],"Natural":[5,83],"Language":[6,84],"Processing":[7],"(NLP)":[8],"toolkit,":[9],"as":[10],"the":[11,61,76,80,131,167,199,219,228],"first":[12,77],"contribution":[13],"of":[14,60,67,79,156,201],"larger":[16],"project,":[17],"for":[18,31,43,50,94,130,139,204,243],"an":[19,95],"under-resourced":[20],"language\u2014Urdu.":[21],"In":[22,182],"previous":[23],"studies,":[24],"standard":[25,44,189],"NLP":[26,97],"toolkits":[27],"have":[28,185,217],"been":[29],"developed":[30,186],"English":[32],"and":[33,48,103,127,162,177,191,197,210,230,234,240],"many":[34],"other":[35],"languages.":[36],"There":[37],"is":[38,142,153],"also":[39],"dire":[41],"need":[42],"text":[45,69,90],"processing":[46,91],"tools":[47,92],"methods":[49],"Urdu,":[51],"despite":[52],"it":[53],"being":[54,70],"widely":[55],"spoken":[56],"in":[57],"different":[58],"parts":[59],"world":[62],"with":[63,117,125,222],"large":[65,187],"amount":[66],"digital":[68],"readily":[71],"available.":[72],"presents":[75],"version":[78],"UNLT":[81,108,150,168],"(Urdu":[82],"Toolkit)":[85],"which":[86],"contains":[87],"three":[88],"key":[89],"required":[93],"Urdu":[96,205],"pipeline;":[98],"word":[99,109,206],"tokenizer,":[100,102],"sentence":[101,151,208],"part-of-speech":[104],"(POS)":[105],"tagger.":[106],"The":[107,135,149],"tokenizer":[110,152],"employs":[111],"morpheme":[113],"matching":[114],"algorithm":[115],"coupled":[116],"state-of-the-art":[119],"stochastic":[120,180],"n":[121],"-gram":[122],"language":[123],"model":[124],"back-off":[126],"smoothing":[128],"characteristics":[129],"space":[132,136],"omission":[133],"problem.":[134],"insertion":[137],"problem":[138],"compound":[140],"words":[141],"tackled":[143],"using":[144],"dictionary":[146,163],"look-up":[147,164],"technique.":[148],"combination":[155],"various":[157],"machine":[158],"learning,":[159],"rule-based,":[160],"regular-expressions,":[161],"techniques.":[165,181],"Finally,":[166],"POS":[169,211],"taggers":[170],"are":[171,237],"based":[172],"on":[173],"Hidden":[174],"Markov":[175],"Model":[176],"Maximum":[178],"Entropy-based":[179],"addition,":[183],"we":[184,216],"gold":[188],"training":[190,229],"testing":[192,231],"data":[193,232],"sets":[194],"to":[195],"improve":[196],"evaluate":[198],"performance":[200],"new":[202],"techniques":[203],"tokenization,":[207,209],"tagging.":[212],"For":[213],"comparison":[214],"purposes,":[215],"compared":[218],"proposed":[220,226],"approaches":[221],"several":[223],"methods.":[224],"Our":[225],"UNLT,":[227],"sets,":[233],"supporting":[235],"resources":[236],"all":[238],"free":[239],"publicly":[241],"available":[242],"academic":[244],"use.":[245]},"counts_by_year":[{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
