{"id":"https://openalex.org/W3109540321","doi":"https://doi.org/10.1017/s1351324920000492","title":"Natural language processing for similar languages, varieties, and dialects: A survey","display_name":"Natural language processing for similar languages, varieties, and dialects: A survey","publication_year":2020,"publication_date":"2020-11-01","ids":{"openalex":"https://openalex.org/W3109540321","doi":"https://doi.org/10.1017/s1351324920000492","mag":"3109540321"},"language":"en","primary_location":{"id":"doi:10.1017/s1351324920000492","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324920000492","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://hdl.handle.net/10138/330117","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024937008","display_name":"Marcos Zampieri","orcid":"https://orcid.org/0000-0002-2346-3847"},"institutions":[{"id":"https://openalex.org/I155173764","display_name":"Rochester Institute of Technology","ror":"https://ror.org/00v4yb702","country_code":"US","type":"education","lineage":["https://openalex.org/I155173764"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Marcos Zampieri","raw_affiliation_strings":["Rochester Institute of Technology, USA"],"affiliations":[{"raw_affiliation_string":"Rochester Institute of Technology, USA","institution_ids":["https://openalex.org/I155173764"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012055259","display_name":"Preslav Nakov","orcid":"https://orcid.org/0000-0002-3600-1510"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Preslav Nakov","raw_affiliation_strings":["Qatar Computing Research Institute, HBKU, Qatar"],"affiliations":[{"raw_affiliation_string":"Qatar Computing Research Institute, HBKU, Qatar","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063672760","display_name":"Yves Scherrer","orcid":"https://orcid.org/0000-0001-5247-5073"},"institutions":[{"id":"https://openalex.org/I133731052","display_name":"University of Helsinki","ror":"https://ror.org/040af2s02","country_code":"FI","type":"education","lineage":["https://openalex.org/I133731052"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Yves Scherrer","raw_affiliation_strings":["University of Helsinki, Finland"],"affiliations":[{"raw_affiliation_string":"University of Helsinki, Finland","institution_ids":["https://openalex.org/I133731052"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5024937008"],"corresponding_institution_ids":["https://openalex.org/I155173764"],"apc_list":null,"apc_paid":null,"fwci":3.5657,"has_fulltext":false,"cited_by_count":44,"citation_normalized_percentile":{"value":0.94135049,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"26","issue":"6","first_page":"595","last_page":"612"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8739959001541138},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6689655780792236},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5999786853790283},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.5962953567504883},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5430067181587219},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.5062167048454285},{"id":"https://openalex.org/keywords/computational-linguistics","display_name":"Computational linguistics","score":0.4897732734680176},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.47190696001052856},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.45842471718788147},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4519875943660736},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4143625497817993},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.3680141568183899},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.12839210033416748}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8739959001541138},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6689655780792236},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5999786853790283},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.5962953567504883},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5430067181587219},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.5062167048454285},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.4897732734680176},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.47190696001052856},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.45842471718788147},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4519875943660736},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4143625497817993},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3680141568183899},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12839210033416748},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1017/s1351324920000492","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324920000492","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},{"id":"pmh:oai:helda.helsinki.fi:10138/330117","is_oa":true,"landing_page_url":"http://hdl.handle.net/10138/330117","pdf_url":"http://hdl.handle.net/10138/330117","source":{"id":"https://openalex.org/S4210213322","display_name":"Ty\u00f6v\u00e4entutkimus Vuosikirja","issn_l":"0784-1272","issn":["0784-1272","1459-7780"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"}],"best_oa_location":{"id":"pmh:oai:helda.helsinki.fi:10138/330117","is_oa":true,"landing_page_url":"http://hdl.handle.net/10138/330117","pdf_url":"http://hdl.handle.net/10138/330117","source":{"id":"https://openalex.org/S4210213322","display_name":"Ty\u00f6v\u00e4entutkimus Vuosikirja","issn_l":"0784-1272","issn":["0784-1272","1459-7780"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7300000190734863}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W3109540321.pdf"},"referenced_works_count":165,"referenced_works":["https://openalex.org/W137989762","https://openalex.org/W163214261","https://openalex.org/W181901128","https://openalex.org/W244375653","https://openalex.org/W331019419","https://openalex.org/W380666445","https://openalex.org/W925548373","https://openalex.org/W968157591","https://openalex.org/W1490721004","https://openalex.org/W1510819241","https://openalex.org/W1519942606","https://openalex.org/W1532342735","https://openalex.org/W1544567521","https://openalex.org/W1591532069","https://openalex.org/W1818534184","https://openalex.org/W1971273973","https://openalex.org/W1973279400","https://openalex.org/W1994581546","https://openalex.org/W2006830263","https://openalex.org/W2013526403","https://openalex.org/W2014077556","https://openalex.org/W2022355176","https://openalex.org/W2042987798","https://openalex.org/W2079442239","https://openalex.org/W2095944907","https://openalex.org/W2103195385","https://openalex.org/W2108508979","https://openalex.org/W2109704865","https://openalex.org/W2114579619","https://openalex.org/W2115295410","https://openalex.org/W2116599427","https://openalex.org/W2117717100","https://openalex.org/W2119821739","https://openalex.org/W2126725946","https://openalex.org/W2132609289","https://openalex.org/W2133564696","https://openalex.org/W2135598948","https://openalex.org/W2138122637","https://openalex.org/W2140432583","https://openalex.org/W2143995218","https://openalex.org/W2144571677","https://openalex.org/W2146867136","https://openalex.org/W2152691628","https://openalex.org/W2156554947","https://openalex.org/W2157331557","https://openalex.org/W2160802179","https://openalex.org/W2160817315","https://openalex.org/W2162019804","https://openalex.org/W2163108352","https://openalex.org/W2163942301","https://openalex.org/W2168677000","https://openalex.org/W2168915676","https://openalex.org/W2183864645","https://openalex.org/W2250174871","https://openalex.org/W2250278916","https://openalex.org/W2250320249","https://openalex.org/W2250457198","https://openalex.org/W2250783208","https://openalex.org/W2250877327","https://openalex.org/W2251054663","https://openalex.org/W2251227481","https://openalex.org/W2251862917","https://openalex.org/W2251986002","https://openalex.org/W2258701653","https://openalex.org/W2308338022","https://openalex.org/W2321087410","https://openalex.org/W2399058661","https://openalex.org/W2460907386","https://openalex.org/W2498287611","https://openalex.org/W2527936838","https://openalex.org/W2550821151","https://openalex.org/W2553512497","https://openalex.org/W2561747913","https://openalex.org/W2573442104","https://openalex.org/W2573786697","https://openalex.org/W2575880437","https://openalex.org/W2579343286","https://openalex.org/W2620806258","https://openalex.org/W2624871570","https://openalex.org/W2625431998","https://openalex.org/W2626499725","https://openalex.org/W2626778328","https://openalex.org/W2626786257","https://openalex.org/W2649800034","https://openalex.org/W2739575608","https://openalex.org/W2739708404","https://openalex.org/W2740106139","https://openalex.org/W2740358818","https://openalex.org/W2752630748","https://openalex.org/W2753894810","https://openalex.org/W2757774942","https://openalex.org/W2765448416","https://openalex.org/W2776928811","https://openalex.org/W2806962830","https://openalex.org/W2807654876","https://openalex.org/W2809456172","https://openalex.org/W2810144967","https://openalex.org/W2896457183","https://openalex.org/W2899166839","https://openalex.org/W2911944143","https://openalex.org/W2914120296","https://openalex.org/W2919290281","https://openalex.org/W2936069018","https://openalex.org/W2938765499","https://openalex.org/W2947379851","https://openalex.org/W2948506235","https://openalex.org/W2949888546","https://openalex.org/W2956292147","https://openalex.org/W2962723321","https://openalex.org/W2962784628","https://openalex.org/W2962937786","https://openalex.org/W2963088995","https://openalex.org/W2963118869","https://openalex.org/W2963132462","https://openalex.org/W2963250244","https://openalex.org/W2963310255","https://openalex.org/W2963323103","https://openalex.org/W2963486098","https://openalex.org/W2963490498","https://openalex.org/W2963499843","https://openalex.org/W2963514830","https://openalex.org/W2964015966","https://openalex.org/W2964067989","https://openalex.org/W2964085300","https://openalex.org/W2964308564","https://openalex.org/W2964842416","https://openalex.org/W2970513828","https://openalex.org/W2971120622","https://openalex.org/W2972619139","https://openalex.org/W2973088264","https://openalex.org/W2995118574","https://openalex.org/W3099559062","https://openalex.org/W3103849166","https://openalex.org/W3118973694","https://openalex.org/W3146312150","https://openalex.org/W3170253630","https://openalex.org/W3202903417","https://openalex.org/W3212303322","https://openalex.org/W4237155282","https://openalex.org/W4239510810","https://openalex.org/W4285719527","https://openalex.org/W4295678708","https://openalex.org/W6605563572","https://openalex.org/W6606688363","https://openalex.org/W6630391076","https://openalex.org/W6631707685","https://openalex.org/W6635373843","https://openalex.org/W6679436768","https://openalex.org/W6681262047","https://openalex.org/W6683102685","https://openalex.org/W6690971519","https://openalex.org/W6691208988","https://openalex.org/W6691363017","https://openalex.org/W6729896516","https://openalex.org/W6732164646","https://openalex.org/W6739709963","https://openalex.org/W6739901393","https://openalex.org/W6752073440","https://openalex.org/W6758129091","https://openalex.org/W6765510844","https://openalex.org/W6767737316","https://openalex.org/W6993078076","https://openalex.org/W7029708357","https://openalex.org/W7055927844","https://openalex.org/W7061722477"],"related_works":["https://openalex.org/W2293063786","https://openalex.org/W226586525","https://openalex.org/W2911292476","https://openalex.org/W2362145681","https://openalex.org/W2913520953","https://openalex.org/W1538826769","https://openalex.org/W4310801723","https://openalex.org/W2383292628","https://openalex.org/W1543103045","https://openalex.org/W2067569787"],"abstract_inverted_index":{"Abstract":[0],"There":[1],"has":[2],"been":[3],"a":[4,112],"lot":[5],"of":[6,20,32,51,83,89,114],"recent":[7],"interest":[8],"in":[9,16],"the":[10,17,26,30,69,84,87,93],"natural":[11],"language":[12,21,77,131,142,153],"processing":[13,19,59],"(NLP)":[14],"community":[15],"computational":[18,56,117],"varieties":[22],"and":[23,40,63,79,92,107,127,133,143,146,155],"dialects,":[24],"with":[25,53,75],"aim":[27],"to":[28,46,100],"improve":[29],"performance":[31],"applications":[33,139],"such":[34,140],"as":[35,141],"machine":[36,147],"translation,":[37],"speech":[38],"recognition,":[39],"dialogue":[41],"systems.":[42],"Here,":[43],"we":[44,67,80,136],"attempt":[45],"survey":[47],"this":[48],"growing":[49],"field":[50],"research,":[52],"focus":[54],"on":[55,116],"methods":[57,118],"for":[58,103,122,149],"similar":[60,104,129],"languages,":[61,105,130,152],"varieties,":[62,106,132,154],"dialects.":[64,108,134,156],"In":[65],"particular,":[66],"discuss":[68,137],"most":[70,94],"important":[71],"challenges":[72],"when":[73],"dealing":[74],"diatopic":[76],"variation,":[78],"present":[81,111],"some":[82],"available":[85],"datasets,":[86],"process":[88],"data":[90,96],"collection,":[91],"common":[95],"collection":[97],"strategies":[98],"used":[99],"compile":[101],"datasets":[102],"We":[109],"further":[110],"number":[113],"studies":[115],"developed":[119],"and/or":[120],"adapted":[121],"preprocessing,":[123],"normalization,":[124],"part-of-speech":[125],"tagging,":[126],"parsing":[128],"Finally,":[135],"relevant":[138],"dialect":[144],"identification":[145],"translation":[148],"closely":[150],"related":[151]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":2}],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2025-10-10T00:00:00"}
