{"id":"https://openalex.org/W4404787914","doi":"https://doi.org/10.1109/access.2024.3507382","title":"Enhancing Sindhi Word Segmentation Using Subword Representation Learning and Position-Aware Self-Attention","display_name":"Enhancing Sindhi Word Segmentation Using Subword Representation Learning and Position-Aware Self-Attention","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4404787914","doi":"https://doi.org/10.1109/access.2024.3507382"},"language":"en","primary_location":{"id":"doi:10.1109/access.2024.3507382","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3507382","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2024.3507382","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103165158","display_name":"Wazir Ali","orcid":"https://orcid.org/0000-0002-9392-459X"},"institutions":[{"id":"https://openalex.org/I4210150297","display_name":"Institute of Business Management","ror":"https://ror.org/048w4c951","country_code":"PK","type":"education","lineage":["https://openalex.org/I4210150297"]}],"countries":["PK"],"is_corresponding":true,"raw_author_name":"Wazir Ali","raw_affiliation_strings":["College of Computer Science and Information Systems, Institute of Business Management, Karachi, Pakistan"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Information Systems, Institute of Business Management, Karachi, Pakistan","institution_ids":["https://openalex.org/I4210150297"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016123311","display_name":"Jay Kumar","orcid":"https://orcid.org/0000-0003-4915-9701"},"institutions":[{"id":"https://openalex.org/I129902397","display_name":"Dalhousie University","ror":"https://ror.org/01e6qks80","country_code":"CA","type":"education","lineage":["https://openalex.org/I129902397"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jay Kumar","raw_affiliation_strings":["Faculty of Computer Science, Dalhousie University, Halifax, NS, Canada"],"affiliations":[{"raw_affiliation_string":"Faculty of Computer Science, Dalhousie University, Halifax, NS, Canada","institution_ids":["https://openalex.org/I129902397"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088667335","display_name":"Saifullah Tumrani","orcid":"https://orcid.org/0000-0002-6221-3488"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saifullah Tumani","raw_affiliation_strings":["Hessianflux O&#x00DC;, Tallinn, Estonia"],"affiliations":[{"raw_affiliation_string":"Hessianflux O&#x00DC;, Tallinn, Estonia","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044384118","display_name":"Redhwan Nour","orcid":"https://orcid.org/0000-0002-6030-1505"},"institutions":[{"id":"https://openalex.org/I23075662","display_name":"Taibah University","ror":"https://ror.org/01xv1nn60","country_code":"SA","type":"education","lineage":["https://openalex.org/I23075662"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Redhwan Nour","raw_affiliation_strings":["Department of Computer Science, College of Computer Science and Engineering, Taibah University, Medina, Saudi Arabia"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, College of Computer Science and Engineering, Taibah University, Medina, Saudi Arabia","institution_ids":["https://openalex.org/I23075662"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052495689","display_name":"Adeeb Noor","orcid":"https://orcid.org/0000-0002-8251-1853"},"institutions":[{"id":"https://openalex.org/I185163786","display_name":"King Abdulaziz University","ror":"https://ror.org/02ma4wv74","country_code":"SA","type":"education","lineage":["https://openalex.org/I185163786"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Adeeb Noor","raw_affiliation_strings":["Department of Information Technology, Faculty of Computing and Information Technology, King Abdulaziz University, Jeddah, Saudi Arabia"],"affiliations":[{"raw_affiliation_string":"Department of Information Technology, Faculty of Computing and Information Technology, King Abdulaziz University, Jeddah, Saudi Arabia","institution_ids":["https://openalex.org/I185163786"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051227924","display_name":"Zenglin Xu","orcid":"https://orcid.org/0000-0001-5550-6461"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zenglin Xu","raw_affiliation_strings":["School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5103165158"],"corresponding_institution_ids":["https://openalex.org/I4210150297"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19139835,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"12","issue":null,"first_page":"183133","last_page":"183142"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9768000245094299,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7449855208396912},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6170135736465454},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6018392443656921},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5822486281394958},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.581567645072937},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5573074817657471},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5100997090339661},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.49628812074661255},{"id":"https://openalex.org/keywords/position","display_name":"Position (finance)","score":0.46041637659072876},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.18440935015678406}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7449855208396912},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6170135736465454},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6018392443656921},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5822486281394958},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.581567645072937},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5573074817657471},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5100997090339661},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.49628812074661255},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.46041637659072876},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.18440935015678406},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2024.3507382","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3507382","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:134b0d78b2944c8a90af182de293903b","is_oa":true,"landing_page_url":"https://doaj.org/article/134b0d78b2944c8a90af182de293903b","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 12, Pp 183133-183142 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2024.3507382","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3507382","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G267898187","display_name":null,"funder_award_id":"2018YFB1005100","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G3898052904","display_name":null,"funder_award_id":"2018YFB1005104","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W1508176368","https://openalex.org/W1940872118","https://openalex.org/W2064675550","https://openalex.org/W2131774270","https://openalex.org/W2144639898","https://openalex.org/W2250739653","https://openalex.org/W2274880506","https://openalex.org/W2398978578","https://openalex.org/W2423157849","https://openalex.org/W2471408330","https://openalex.org/W2493916176","https://openalex.org/W2514307064","https://openalex.org/W2587051312","https://openalex.org/W2611455120","https://openalex.org/W2801805865","https://openalex.org/W2808452747","https://openalex.org/W2890459330","https://openalex.org/W2900613036","https://openalex.org/W2912165583","https://openalex.org/W2950953300","https://openalex.org/W2962885853","https://openalex.org/W2962902328","https://openalex.org/W2963233387","https://openalex.org/W2963572611","https://openalex.org/W2964093505","https://openalex.org/W3008533347","https://openalex.org/W3096005429","https://openalex.org/W3099527960","https://openalex.org/W3104309049","https://openalex.org/W3113915881","https://openalex.org/W3210203631","https://openalex.org/W4287660544","https://openalex.org/W4294170691","https://openalex.org/W4304190326","https://openalex.org/W4319041195","https://openalex.org/W4383556271","https://openalex.org/W4384666235","https://openalex.org/W4385245566","https://openalex.org/W4387428045","https://openalex.org/W4390590487","https://openalex.org/W4392199349","https://openalex.org/W4396523448","https://openalex.org/W4396690825","https://openalex.org/W6631190155","https://openalex.org/W6640362995","https://openalex.org/W6681100366","https://openalex.org/W6682082992","https://openalex.org/W6689370836","https://openalex.org/W6713134421","https://openalex.org/W6733484550","https://openalex.org/W6744142555","https://openalex.org/W6744958764","https://openalex.org/W6752788575","https://openalex.org/W6753568494","https://openalex.org/W6754275386","https://openalex.org/W6755183338","https://openalex.org/W6787045683"],"related_works":["https://openalex.org/W4392255232","https://openalex.org/W2978383222","https://openalex.org/W2172629291","https://openalex.org/W2380773642","https://openalex.org/W2384559435","https://openalex.org/W2337707338","https://openalex.org/W2393940967","https://openalex.org/W2785359773","https://openalex.org/W2159591557","https://openalex.org/W2058548953"],"abstract_inverted_index":{"Sindhi":[0,15,39,144],"word":[1,36,40,84,104,145],"segmentation":[2,41,105,146],"is":[3],"a":[4,96,107,119,128],"challenging":[5],"task":[6],"due":[7],"to":[8,19],"space":[9],"omission":[10],"and":[11,24,31,46,66,127],"insertion":[12],"issues.":[13],"The":[14,111],"language":[16],"itself":[17],"adds":[18],"this":[20,92],"complexity.":[21],"It\u2019s":[22],"cursive":[23],"consists":[25],"of":[26,35,71],"characters":[27],"with":[28,68],"inherent":[29],"joining":[30],"non-joining":[32],"properties,":[33],"independent":[34],"boundaries.":[37],"Existing":[38],"methods":[42,52],"rely":[43],"on":[44,147],"designing":[45],"combining":[47],"hand-crafted":[48],"features.":[49],"However,":[50],"these":[51],"have":[53],"limitations,":[54],"such":[55],"as":[56,106],"difficulty":[57],"handling":[58],"out-of-vocabulary":[59],"words,":[60],"limited":[61],"robustness":[62],"for":[63],"other":[64],"languages,":[65],"inefficiency":[67],"large":[69],"amounts":[70],"noisy":[72],"or":[73],"raw":[74],"text.":[75],"Neural":[76,98],"network-based":[77],"models,":[78],"in":[79,143],"contrast,":[80],"can":[81],"automatically":[82],"capture":[83],"boundary":[85],"information":[86],"without":[87],"requiring":[88],"prior":[89],"knowledge.":[90],"In":[91],"paper,":[93],"we":[94],"propose":[95],"Subword-Guided":[97],"Word":[99],"Segmenter":[100],"(SGNWS)":[101],"that":[102,136],"addresses":[103],"sequence":[108],"labeling":[109],"task.":[110],"SGNWS":[112,138],"model":[113,139],"incorporates":[114],"subword":[115],"representation":[116],"learning":[117],"through":[118],"bidirectional":[120],"long":[121],"short-term":[122],"memory":[123],"encoder,":[124],"position-aware":[125],"self-attention,":[126],"conditional":[129],"random":[130],"field.":[131],"Our":[132],"empirical":[133],"results":[134],"demonstrate":[135],"the":[137],"achieves":[140],"state-of-the-art":[141],"performance":[142],"six":[148],"datasets.":[149]},"counts_by_year":[],"updated_date":"2026-04-16T15:07:20.185449","created_date":"2025-10-10T00:00:00"}
