{"id":"https://openalex.org/W4415240067","doi":"https://doi.org/10.48550/arxiv.2508.14586","title":"Filling the Gap for Uzbek: Creating Translation Resources for Southern Uzbek","display_name":"Filling the Gap for Uzbek: Creating Translation Resources for Southern Uzbek","publication_year":2025,"publication_date":"2025-08-20","ids":{"openalex":"https://openalex.org/W4415240067","doi":"https://doi.org/10.48550/arxiv.2508.14586"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2508.14586","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.14586","pdf_url":"https://arxiv.org/pdf/2508.14586","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2508.14586","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5085587044","display_name":"Mukhammadsaid Mamasaidov","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mamasaidov, Mukhammadsaid","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120021839","display_name":"Azizullah Aral","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aral, Azizullah","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114341707","display_name":"Abror Shopulatov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shopulatov, Abror","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5120021840","display_name":"Mironshoh Inomjonov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Inomjonov, Mironshoh","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5085587044"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14417","display_name":"Central Asia Education and Culture","score":0.8992000222206116,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T14417","display_name":"Central Asia Education and Culture","score":0.8992000222206116,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13958","display_name":"Education, Innovation and Language Studies","score":0.8025000095367432,"subfield":{"id":"https://openalex.org/subfields/3304","display_name":"Education"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13547","display_name":"Economic and Industrial Development","score":0.7820000052452087,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/uzbek","display_name":"Uzbek","score":0.9976999759674072},{"id":"https://openalex.org/keywords/southeast-asia","display_name":"Southeast asia","score":0.4812999963760376},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.47519999742507935},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.38679999113082886},{"id":"https://openalex.org/keywords/natural","display_name":"Natural (archaeology)","score":0.33719998598098755},{"id":"https://openalex.org/keywords/centenarian","display_name":"Centenarian","score":0.3000999987125397}],"concepts":[{"id":"https://openalex.org/C2778718127","wikidata":"https://www.wikidata.org/wiki/Q9264","display_name":"Uzbek","level":2,"score":0.9976999759674072},{"id":"https://openalex.org/C3019398675","wikidata":"https://www.wikidata.org/wiki/Q11708","display_name":"Southeast asia","level":2,"score":0.4812999963760376},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.47519999742507935},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.42289999127388},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.40389999747276306},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.38679999113082886},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.33719998598098755},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.32510000467300415},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.31709998846054077},{"id":"https://openalex.org/C2779403534","wikidata":"https://www.wikidata.org/wiki/Q2944360","display_name":"Centenarian","level":3,"score":0.3000999987125397},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.2994999885559082},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.28049999475479126},{"id":"https://openalex.org/C3017485454","wikidata":"https://www.wikidata.org/wiki/Q27275","display_name":"Central asia","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C29985473","wikidata":"https://www.wikidata.org/wiki/Q188460","display_name":"Natural resource","level":2,"score":0.25940001010894775}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2508.14586","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.14586","pdf_url":"https://arxiv.org/pdf/2508.14586","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2508.14586","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.14586","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2508.14586","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2508.14586","pdf_url":"https://arxiv.org/pdf/2508.14586","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415240067.pdf","grobid_xml":"https://content.openalex.org/works/W4415240067.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Southern":[0,34,47,102],"Uzbek":[1,21,35,48,103],"(uzs)":[2],"is":[3,36],"a":[4,52,67,75],"Turkic":[5],"language":[6,40],"variety":[7],"spoken":[8],"by":[9],"around":[10],"5":[11],"million":[12],"people":[13],"in":[14,23,38],"Afghanistan":[15],"and":[16,26,63,66,92,104],"differs":[17],"significantly":[18],"from":[19,60],"Northern":[20],"(uzn)":[22],"phonology,":[24],"lexicon,":[25],"orthography.":[27],"Despite":[28],"the":[29],"large":[30],"number":[31],"of":[32,86],"speakers,":[33],"underrepresented":[37],"natural":[39],"processing.":[41],"We":[42,72],"present":[43],"new":[44],"resources":[45],"for":[46,78],"machine":[49],"translation,":[50],"including":[51],"997-sentence":[53],"FLORES+":[54],"dev":[55],"set,":[56],"39,994":[57],"parallel":[58],"sentences":[59],"dictionary,":[61],"literary,":[62],"web":[64],"sources,":[65],"fine-tuned":[68],"NLLB-200":[69],"model":[70],"(lutfiy).":[71],"also":[73],"propose":[74],"post-processing":[76],"method":[77],"restoring":[79],"Arabic-script":[80],"half-space":[81],"characters,":[82],"which":[83],"improves":[84],"handling":[85],"morphological":[87],"boundaries.":[88],"All":[89],"datasets,":[90],"models,":[91],"tools":[93],"are":[94],"released":[95],"publicly":[96],"to":[97],"support":[98],"future":[99],"work":[100],"on":[101],"other":[105],"low-resource":[106],"languages.":[107]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-16T00:00:00"}
