{"id":"https://openalex.org/W7127153790","doi":"https://doi.org/10.3390/data11020028","title":"Dual-Source Synthetic Uzbek Corpora for Sentiment Analysis and NER with Controlled Emoji Signals","display_name":"Dual-Source Synthetic Uzbek Corpora for Sentiment Analysis and NER with Controlled Emoji Signals","publication_year":2026,"publication_date":"2026-02-01","ids":{"openalex":"https://openalex.org/W7127153790","doi":"https://doi.org/10.3390/data11020028"},"language":"en","primary_location":{"id":"doi:10.3390/data11020028","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data11020028","pdf_url":null,"source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.3390/data11020028","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5121549953","display_name":"Bobur Rashidovich Saidov","orcid":null},"institutions":[{"id":"https://openalex.org/I188973947","display_name":"Novosibirsk State University","ror":"https://ror.org/04t2ss102","country_code":"RU","type":"education","lineage":["https://openalex.org/I188973947"]}],"countries":["RU"],"is_corresponding":true,"raw_author_name":"Bobur Saidov","raw_affiliation_strings":["Faculty of Mechanics and Mathematics, Novosibirsk State University, 1 Pirogova str., Novosibirsk 630090, Russia"],"affiliations":[{"raw_affiliation_string":"Faculty of Mechanics and Mathematics, Novosibirsk State University, 1 Pirogova str., Novosibirsk 630090, Russia","institution_ids":["https://openalex.org/I188973947"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121598982","display_name":"Vladimir Borisovich Barakhnin","orcid":null},"institutions":[{"id":"https://openalex.org/I188973947","display_name":"Novosibirsk State University","ror":"https://ror.org/04t2ss102","country_code":"RU","type":"education","lineage":["https://openalex.org/I188973947"]},{"id":"https://openalex.org/I4210137651","display_name":"Federal Research Center for Information and Computational Technologies","ror":"https://ror.org/02x3mf211","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I4210097085","https://openalex.org/I4210137651"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Vladimir Barakhnin","raw_affiliation_strings":["Faculty of Mechanics and Mathematics, Novosibirsk State University, 1 Pirogova str., Novosibirsk 630090, Russia","Federal Research Center for Information and Computational Technologies, Novosibirsk 630090, Russia"],"affiliations":[{"raw_affiliation_string":"Faculty of Mechanics and Mathematics, Novosibirsk State University, 1 Pirogova str., Novosibirsk 630090, Russia","institution_ids":["https://openalex.org/I188973947"]},{"raw_affiliation_string":"Federal Research Center for Information and Computational Technologies, Novosibirsk 630090, Russia","institution_ids":["https://openalex.org/I4210137651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114850099","display_name":"Shohrux Madirimov","orcid":null},"institutions":[{"id":"https://openalex.org/I188973947","display_name":"Novosibirsk State University","ror":"https://ror.org/04t2ss102","country_code":"RU","type":"education","lineage":["https://openalex.org/I188973947"]},{"id":"https://openalex.org/I4210112770","display_name":"Tashkent Institute of Textile and Light Industry","ror":"https://ror.org/01kt71615","country_code":"UZ","type":"education","lineage":["https://openalex.org/I4210112770"]}],"countries":["RU","UZ"],"is_corresponding":false,"raw_author_name":"Shohrux Madirimov","raw_affiliation_strings":["Faculty of Mechanics and Mathematics, Novosibirsk State University, 1 Pirogova str., Novosibirsk 630090, Russia","Tashkent Institute of Textile and Light Industry, Tashkent 100100, Uzbekistan"],"affiliations":[{"raw_affiliation_string":"Faculty of Mechanics and Mathematics, Novosibirsk State University, 1 Pirogova str., Novosibirsk 630090, Russia","institution_ids":["https://openalex.org/I188973947"]},{"raw_affiliation_string":"Tashkent Institute of Textile and Light Industry, Tashkent 100100, Uzbekistan","institution_ids":["https://openalex.org/I4210112770"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120847850","display_name":"Umid Ibragimov","orcid":null},"institutions":[{"id":"https://openalex.org/I188973947","display_name":"Novosibirsk State University","ror":"https://ror.org/04t2ss102","country_code":"RU","type":"education","lineage":["https://openalex.org/I188973947"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Umid Ibragimov","raw_affiliation_strings":["Faculty of Mechanics and Mathematics, Novosibirsk State University, 1 Pirogova str., Novosibirsk 630090, Russia"],"affiliations":[{"raw_affiliation_string":"Faculty of Mechanics and Mathematics, Novosibirsk State University, 1 Pirogova str., Novosibirsk 630090, Russia","institution_ids":["https://openalex.org/I188973947"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114850009","display_name":"Shakhboz Meylikulov","orcid":null},"institutions":[{"id":"https://openalex.org/I4210159709","display_name":"Termez State University","ror":"https://ror.org/05416zy49","country_code":"UZ","type":"education","lineage":["https://openalex.org/I4210159709"]}],"countries":["UZ"],"is_corresponding":false,"raw_author_name":"Shakhboz Meylikulov","raw_affiliation_strings":["Department of Information Technology and Exact Sciences, Termez University of Economics and Service, 38-B, Ibn-Sino str., Termez 190100, Uzbekistan"],"affiliations":[{"raw_affiliation_string":"Department of Information Technology and Exact Sciences, Termez University of Economics and Service, 38-B, Ibn-Sino str., Termez 190100, Uzbekistan","institution_ids":["https://openalex.org/I4210159709"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124788998","display_name":"Sultonbek Normamatov","orcid":null},"institutions":[{"id":"https://openalex.org/I3133440902","display_name":"Tashkent State University of Oriental Studies","ror":"https://ror.org/03d53xe41","country_code":"UZ","type":"education","lineage":["https://openalex.org/I3133440902"]}],"countries":["UZ"],"is_corresponding":false,"raw_author_name":"Sultonbek Normamatov","raw_affiliation_strings":["Department of Computer Linguistics and Digital Technologies, Faculty of Social and Humanitarian Sciences, Alisher Navo\u2032i Tashkent State University of Uzbek Language and Literature, 103, Yusuf Xos Khojib Str., Tashkent 100013, Uzbekistan"],"affiliations":[{"raw_affiliation_string":"Department of Computer Linguistics and Digital Technologies, Faculty of Social and Humanitarian Sciences, Alisher Navo\u2032i Tashkent State University of Uzbek Language and Literature, 103, Yusuf Xos Khojib Str., Tashkent 100013, Uzbekistan","institution_ids":["https://openalex.org/I3133440902"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124754778","display_name":"Feruza Bahodirova","orcid":null},"institutions":[{"id":"https://openalex.org/I14916453","display_name":"Urgench State University","ror":"https://ror.org/0593kfr97","country_code":"UZ","type":"education","lineage":["https://openalex.org/I14916453"]}],"countries":["UZ"],"is_corresponding":false,"raw_author_name":"Feruza Bahodirova","raw_affiliation_strings":["Department of Interfaculty Foreign Languages, Urgench State University, 14, Kh. Alimdjan str., Urgench 220100, Uzbekistan"],"affiliations":[{"raw_affiliation_string":"Department of Interfaculty Foreign Languages, Urgench State University, 14, Kh. Alimdjan str., Urgench 220100, Uzbekistan","institution_ids":["https://openalex.org/I14916453"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124788940","display_name":"Javlonbek Matnazarov","orcid":null},"institutions":[{"id":"https://openalex.org/I3123046734","display_name":"Lorestan University","ror":"https://ror.org/051bats05","country_code":"IR","type":"education","lineage":["https://openalex.org/I3123046734"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Javlonbek Matnazarov","raw_affiliation_strings":["Department of Language and Literature, Mamun University, 2, Bol-xovuz str., Khiva 220901, Uzbekistan"],"affiliations":[{"raw_affiliation_string":"Department of Language and Literature, Mamun University, 2, Bol-xovuz str., Khiva 220901, Uzbekistan","institution_ids":["https://openalex.org/I3123046734"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114652201","display_name":"Zarnigor Fayzullaeva","orcid":null},"institutions":[{"id":"https://openalex.org/I21256249","display_name":"Tashkent University of Information Technology","ror":"https://ror.org/05987a729","country_code":"UZ","type":"education","lineage":["https://openalex.org/I21256249"]}],"countries":["UZ"],"is_corresponding":false,"raw_author_name":"Zarnigor Fayzullaeva","raw_affiliation_strings":["Department of Software Engineering, Tashkent University of Information Technologies Named After Muhammad al-Khwarizmi, Tashkent 100084, Uzbekistan"],"affiliations":[{"raw_affiliation_string":"Department of Software Engineering, Tashkent University of Information Technologies Named After Muhammad al-Khwarizmi, Tashkent 100084, Uzbekistan","institution_ids":["https://openalex.org/I21256249"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5121549953"],"corresponding_institution_ids":["https://openalex.org/I188973947"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.49868588,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"11","issue":"2","first_page":"28","last_page":"28"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.944100022315979,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.944100022315979,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.014100000262260437,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12488","display_name":"Mental Health via Writing","score":0.006000000052154064,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/emoji","display_name":"Emoji","score":0.8454999923706055},{"id":"https://openalex.org/keywords/sentiment-analysis","display_name":"Sentiment analysis","score":0.5498999953269958},{"id":"https://openalex.org/keywords/corpus-linguistics","display_name":"Corpus linguistics","score":0.5138999819755554},{"id":"https://openalex.org/keywords/comparability","display_name":"Comparability","score":0.44749999046325684},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.4341000020503998},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.43369999527931213},{"id":"https://openalex.org/keywords/computational-linguistics","display_name":"Computational linguistics","score":0.4185999929904938}],"concepts":[{"id":"https://openalex.org/C2779247141","wikidata":"https://www.wikidata.org/wiki/Q1049294","display_name":"Emoji","level":3,"score":0.8454999923706055},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8134999871253967},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7731999754905701},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7149999737739563},{"id":"https://openalex.org/C66402592","wikidata":"https://www.wikidata.org/wiki/Q2271421","display_name":"Sentiment analysis","level":2,"score":0.5498999953269958},{"id":"https://openalex.org/C532629269","wikidata":"https://www.wikidata.org/wiki/Q865083","display_name":"Corpus linguistics","level":2,"score":0.5138999819755554},{"id":"https://openalex.org/C197947376","wikidata":"https://www.wikidata.org/wiki/Q5155608","display_name":"Comparability","level":2,"score":0.44749999046325684},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.4341000020503998},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.43369999527931213},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.4185999929904938},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.4104999899864197},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3434000015258789},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.3312000036239624},{"id":"https://openalex.org/C2474386","wikidata":"https://www.wikidata.org/wiki/Q461183","display_name":"Text corpus","level":2,"score":0.30709999799728394},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.30559998750686646},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C96711827","wikidata":"https://www.wikidata.org/wiki/Q17012245","display_name":"Entity linking","level":3,"score":0.25619998574256897},{"id":"https://openalex.org/C2779631151","wikidata":"https://www.wikidata.org/wiki/Q7883767","display_name":"Underspecification","level":2,"score":0.25029999017715454}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/data11020028","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data11020028","pdf_url":null,"source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:360e8f33ff824fbba3d92532b238664f","is_oa":true,"landing_page_url":"https://doaj.org/article/360e8f33ff824fbba3d92532b238664f","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Data, Vol 11, Iss 2, p 28 (2026)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/data11020028","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data11020028","pdf_url":null,"source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2302501749","https://openalex.org/W3049404478","https://openalex.org/W4232922170","https://openalex.org/W4285124320","https://openalex.org/W4365517464","https://openalex.org/W4394859529","https://openalex.org/W4403309748","https://openalex.org/W4404740695","https://openalex.org/W4404790409","https://openalex.org/W4405586136","https://openalex.org/W4405787016","https://openalex.org/W4410735829","https://openalex.org/W4414199958","https://openalex.org/W7084129356"],"related_works":[],"abstract_inverted_index":{"This":[0],"data":[1],"descriptor":[2],"presents":[3],"two":[4],"fully":[5],"synthetic":[6,24],"corpora":[7,93],"for":[8],"sentiment":[9],"analysis":[10],"and":[11,41,99,102],"named":[12,35],"entity":[13],"recognition":[14],"(NER)":[15],"in":[16,87,96],"Uzbek.":[17],"The":[18,46,121],"first":[19],"corpus":[20,48],"contains":[21],"12,000":[22],"hybrid":[23],"sentences":[25,52],"generated":[26],"from":[27],"templates":[28],"with":[29],"lexical":[30],"randomization,":[31],"automatic":[32],"insertion":[33],"of":[34],"entities":[36],"(PER/ORG/LOC),":[37],"lexicon-based":[38],"polarity":[39],"scoring,":[40],"a":[42,74,104],"controlled":[43],"emoji":[44,76],"distribution.":[45],"second":[47],"includes":[49,73],"3000":[50],"\u201cmanual-style\u201d":[51],"designed":[53],"to":[54,67,84],"resemble":[55],"short,":[56],"naturally":[57],"structured":[58],"messages.":[59],"Although":[60],"the":[61,70],"manual-style":[62],"subset":[63],"was":[64],"initially":[65],"intended":[66],"be":[68],"emoji-free,":[69],"released":[71,95],"version":[72],"39.6%":[75],"presence":[77],"(sentences":[78],"containing":[79],"at":[80],"least":[81],"one":[82],"emoji)":[83],"maintain":[85],"comparability":[86],"emotional":[88],"markers":[89],"across":[90],"corpora.":[91],"Both":[92],"are":[94],"CSV,":[97],"XLSX,":[98],"JSONL":[100],"formats":[101],"share":[103],"unified":[105],"schema":[106],"(id,":[107],"text,":[108],"sentiment,":[109],"entities,":[110],"entity_type,":[111],"polarity_score,":[112],"polarity_source,":[113],"token_count,":[114],"emojis,":[115],"emoji_position,":[116],"emoji_sentiment,":[117],"conflict_flag,":[118],"sentiment_from_polarity_score,":[119],"split).":[120],"dataset":[122],"is":[123],"publicly":[124],"available":[125],"via":[126],"Mendeley":[127],"Data":[128],"(DOI:":[129],"10.17632/y2d5pcyrzz.3).":[130]},"counts_by_year":[],"updated_date":"2026-02-28T09:26:25.869077","created_date":"2026-02-03T00:00:00"}
