{"id":"https://openalex.org/W2741795463","doi":"https://doi.org/10.18653/v1/w17-1217","title":"Discriminating between Similar Languages Using a Combination of Typed and Untyped Character N-grams and Words","display_name":"Discriminating between Similar Languages Using a Combination of Typed and Untyped Character N-grams and Words","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W2741795463","doi":"https://doi.org/10.18653/v1/w17-1217","mag":"2741795463"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w17-1217","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-1217","pdf_url":"https://www.aclweb.org/anthology/W17-1217.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Workshop on NLP for Similar Languages,\n          Varieties and Dialects (VarDial)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W17-1217.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013558219","display_name":"Helena G\u00f3mez-Adorno","orcid":"https://orcid.org/0000-0002-6966-9912"},"institutions":[{"id":"https://openalex.org/I59361560","display_name":"Instituto Polit\u00e9cnico Nacional","ror":"https://ror.org/059sp8j34","country_code":"MX","type":"education","lineage":["https://openalex.org/I59361560"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Helena Gomez","raw_affiliation_strings":["Instituto Polit\u00e9cnico Nacional, Center for Computing Research, Av. Juan de Dios B\u00e1tiz, C.P. 07738, Mexico City, Mexico"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Instituto Polit\u00e9cnico Nacional, Center for Computing Research, Av. Juan de Dios B\u00e1tiz, C.P. 07738, Mexico City, Mexico","institution_ids":["https://openalex.org/I59361560"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051243807","display_name":"Ilia Markov","orcid":"https://orcid.org/0000-0001-9533-748X"},"institutions":[{"id":"https://openalex.org/I59361560","display_name":"Instituto Polit\u00e9cnico Nacional","ror":"https://ror.org/059sp8j34","country_code":"MX","type":"education","lineage":["https://openalex.org/I59361560"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Ilia Markov","raw_affiliation_strings":["Instituto Politcnico Nacional, Center for Computing Research, Av. Juan de Dios Btiz, C.P. 07738, Mexico City, Mexico","Instituto Polit\u00e9cnico Nacional, Center for Computing Research, Av. Juan de Dios B\u00e1tiz, C.P. 07738, Mexico City, Mexico"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Instituto Politcnico Nacional, Center for Computing Research, Av. Juan de Dios Btiz, C.P. 07738, Mexico City, Mexico","institution_ids":["https://openalex.org/I59361560"]},{"raw_affiliation_string":"Instituto Polit\u00e9cnico Nacional, Center for Computing Research, Av. Juan de Dios B\u00e1tiz, C.P. 07738, Mexico City, Mexico","institution_ids":["https://openalex.org/I59361560"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059492029","display_name":"Jorge Baptista","orcid":"https://orcid.org/0000-0003-4603-4364"},"institutions":[{"id":"https://openalex.org/I121345201","display_name":"Instituto de Engenharia de Sistemas e Computadores Investiga\u00e7\u00e3o e Desenvolvimento","ror":"https://ror.org/04mqy3p58","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I121345201","https://openalex.org/I4210125590"]},{"id":"https://openalex.org/I71503853","display_name":"University of Algarve","ror":"https://ror.org/014g34x36","country_code":"PT","type":"education","lineage":["https://openalex.org/I71503853"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Jorge Baptista","raw_affiliation_strings":["Universidade do Algarve/FCHS and INESC-ID Lisboa/L2F, Campus de Gambelas, P-8005-139, Faro, Portugal"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universidade do Algarve/FCHS and INESC-ID Lisboa/L2F, Campus de Gambelas, P-8005-139, Faro, Portugal","institution_ids":["https://openalex.org/I71503853","https://openalex.org/I121345201"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008287867","display_name":"Grigori Sidorov","orcid":"https://orcid.org/0000-0003-3901-3522"},"institutions":[{"id":"https://openalex.org/I59361560","display_name":"Instituto Polit\u00e9cnico Nacional","ror":"https://ror.org/059sp8j34","country_code":"MX","type":"education","lineage":["https://openalex.org/I59361560"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Grigori Sidorov","raw_affiliation_strings":["Instituto Politcnico Nacional, Center for Computing Research, Av. Juan de Dios Btiz, C.P. 07738, Mexico City, Mexico","Instituto Polit\u00e9cnico Nacional, Center for Computing Research, Av. Juan de Dios B\u00e1tiz, C.P. 07738, Mexico City, Mexico"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Instituto Politcnico Nacional, Center for Computing Research, Av. Juan de Dios Btiz, C.P. 07738, Mexico City, Mexico","institution_ids":["https://openalex.org/I59361560"]},{"raw_affiliation_string":"Instituto Polit\u00e9cnico Nacional, Center for Computing Research, Av. Juan de Dios B\u00e1tiz, C.P. 07738, Mexico City, Mexico","institution_ids":["https://openalex.org/I59361560"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016612157","display_name":"David Pinto","orcid":"https://orcid.org/0000-0002-8516-5925"},"institutions":[{"id":"https://openalex.org/I4210111321","display_name":"Universidad de Puebla","ror":"https://ror.org/02rsx0d74","country_code":"MX","type":"education","lineage":["https://openalex.org/I4210111321"]},{"id":"https://openalex.org/I721619","display_name":"Benem\u00e9rita Universidad Aut\u00f3noma de Puebla","ror":"https://ror.org/03p2z7827","country_code":"MX","type":"education","lineage":["https://openalex.org/I721619"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"David Pinto","raw_affiliation_strings":["Benemrita Universidad Autnoma de Puebla, Faculty of Computer Science, Av. San Claudio y 14 Sur, C.P. 72570, Puebla, Mexico","Benem\u00e9rita Universidad Aut\u00f3noma de Puebla, Faculty of Computer Science, Av. San Claudio y 14 Sur, C.P. 72570, Puebla, Mexico"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Benemrita Universidad Autnoma de Puebla, Faculty of Computer Science, Av. San Claudio y 14 Sur, C.P. 72570, Puebla, Mexico","institution_ids":["https://openalex.org/I721619","https://openalex.org/I4210111321"]},{"raw_affiliation_string":"Benem\u00e9rita Universidad Aut\u00f3noma de Puebla, Faculty of Computer Science, Av. San Claudio y 14 Sur, C.P. 72570, Puebla, Mexico","institution_ids":["https://openalex.org/I721619"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.2719,"has_fulltext":true,"cited_by_count":13,"citation_normalized_percentile":{"value":0.91014765,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"137","last_page":"145"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13959","display_name":"Swearing, Euphemism, Multilingualism","score":0.9398000240325928,"subfield":{"id":"https://openalex.org/subfields/3315","display_name":"Communication"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.8588314652442932},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7267641425132751},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.6062207221984863},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5346981883049011},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36866116523742676},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.3682723641395569},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3391178846359253},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.20166486501693726},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.05651223659515381}],"concepts":[{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.8588314652442932},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7267641425132751},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.6062207221984863},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5346981883049011},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36866116523742676},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.3682723641395569},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3391178846359253},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.20166486501693726},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.05651223659515381},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/w17-1217","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-1217","pdf_url":"https://www.aclweb.org/anthology/W17-1217.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Workshop on NLP for Similar Languages,\n          Varieties and Dialects (VarDial)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/w17-1217","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-1217","pdf_url":"https://www.aclweb.org/anthology/W17-1217.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Workshop on NLP for Similar Languages,\n          Varieties and Dialects (VarDial)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.7200000286102295}],"awards":[{"id":"https://openalex.org/G2041513338","display_name":null,"funder_award_id":"20162064","funder_id":"https://openalex.org/F4320324217","funder_display_name":"Comisi\u00f3n de Operaci\u00f3n y Fomento de Actividades Acad\u00e9micas, Instituto Polit\u00e9cnico Nacional"},{"id":"https://openalex.org/G309696157","display_name":null,"funder_award_id":"UID/CEC/50021/2013","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"},{"id":"https://openalex.org/G3449656711","display_name":null,"funder_award_id":"240844","funder_id":"https://openalex.org/F4320324217","funder_display_name":"Comisi\u00f3n de Operaci\u00f3n y Fomento de Actividades Acad\u00e9micas, Instituto Polit\u00e9cnico Nacional"},{"id":"https://openalex.org/G3736033244","display_name":null,"funder_award_id":"20162204","funder_id":"https://openalex.org/F4320324217","funder_display_name":"Comisi\u00f3n de Operaci\u00f3n y Fomento de Actividades Acad\u00e9micas, Instituto Polit\u00e9cnico Nacional"},{"id":"https://openalex.org/G4399037731","display_name":null,"funder_award_id":"20161947","funder_id":"https://openalex.org/F4320324217","funder_display_name":"Comisi\u00f3n de Operaci\u00f3n y Fomento de Actividades Acad\u00e9micas, Instituto Polit\u00e9cnico Nacional"},{"id":"https://openalex.org/G6134525641","display_name":null,"funder_award_id":"20151589","funder_id":"https://openalex.org/F4320324217","funder_display_name":"Comisi\u00f3n de Operaci\u00f3n y Fomento de Actividades Acad\u00e9micas, Instituto Polit\u00e9cnico Nacional"},{"id":"https://openalex.org/G6391027779","display_name":null,"funder_award_id":"20161958","funder_id":"https://openalex.org/F4320324217","funder_display_name":"Comisi\u00f3n de Operaci\u00f3n y Fomento de Actividades Acad\u00e9micas, Instituto Polit\u00e9cnico Nacional"}],"funders":[{"id":"https://openalex.org/F4320324217","display_name":"Comisi\u00f3n de Operaci\u00f3n y Fomento de Actividades Acad\u00e9micas, Instituto Polit\u00e9cnico Nacional","ror":"https://ror.org/059sp8j34"},{"id":"https://openalex.org/F4320334779","display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","ror":"https://ror.org/00snfqn58"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2741795463.pdf","grobid_xml":"https://content.openalex.org/works/W2741795463.grobid-xml"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W244375653","https://openalex.org/W1565201084","https://openalex.org/W1570448133","https://openalex.org/W1981399499","https://openalex.org/W2162019804","https://openalex.org/W2250185284","https://openalex.org/W2251071163","https://openalex.org/W2295585256","https://openalex.org/W2399426317","https://openalex.org/W2527936838","https://openalex.org/W2528490756","https://openalex.org/W2553512497","https://openalex.org/W2558611208","https://openalex.org/W2561747913","https://openalex.org/W2571571071","https://openalex.org/W2577176677","https://openalex.org/W2583190795","https://openalex.org/W2588336572","https://openalex.org/W2620806258","https://openalex.org/W2740860101","https://openalex.org/W2785748711","https://openalex.org/W2894737615","https://openalex.org/W2949952663","https://openalex.org/W2955901475","https://openalex.org/W2964078312","https://openalex.org/W3087839522","https://openalex.org/W3088633338","https://openalex.org/W3088953058","https://openalex.org/W4237155282","https://openalex.org/W4285719527","https://openalex.org/W4299312355"],"related_works":["https://openalex.org/W2135396778","https://openalex.org/W4243252198","https://openalex.org/W2109507516","https://openalex.org/W1527862632","https://openalex.org/W2118300983","https://openalex.org/W2112962394","https://openalex.org/W3137189469","https://openalex.org/W2740990710","https://openalex.org/W4235530921","https://openalex.org/W2008310423"],"abstract_inverted_index":{"This":[0,25],"paper":[1],"presents":[2],"the":[3,11,21,64,89,125],"CIC":[4],"UALG's":[5],"system":[6],"that":[7],"took":[8],"part":[9],"in":[10,88,124],"Discriminating":[12],"between":[13],"Similar":[14],"Languages":[15],"(DSL)":[16],"shared":[17],"task,":[18],"held":[19],"at":[20,29],"VarDial":[22],"2017":[23],"Workshop.":[24],"year's":[26],"task":[27,127],"aims":[28],"identifying":[30],"14":[31],"languages":[32,62],"across":[33],"6":[34],"language":[35],"groups":[36],"using":[37],"a":[38,50,56],"corpus":[39],"of":[40,42,73],"excerpts":[41],"journalistic":[43],"texts.":[44],"Two":[45],"classification":[46],"approaches":[47],"were":[48,93],"compared:":[49],"single-step":[51],"(all":[52],"languages)":[53],"approach":[54],"and":[55,60,75,102,109,116],"two-step":[57],"(language":[58],"group":[59],"then":[61],"within":[63],"group)":[65],"approach.":[66],"Features":[67],"exploited":[68],"include":[69],"lexical":[70],"features":[71],"(unigrams":[72],"words)":[74],"character":[76,81,86],"n-grams.":[77],"Besides":[78],"traditional":[79],"(untyped)":[80],"n-grams,":[82],"we":[83],"introduce":[84],"typed":[85],"n-grams":[87],"DSL":[90,126],"task.":[91],"Experiments":[92],"carried":[94],"out":[95],"with":[96],"different":[97],"feature":[98],"representation":[99],"methods":[100],"(binary":[101],"raw":[103],"term":[104],"frequency),":[105],"frequency":[106],"threshold":[107],"values,":[108],"machine-learning":[110],"algorithms":[111],"-Support":[112],"Vector":[113],"Machines":[114],"(SVM)":[115],"Multinomial":[117],"Naive":[118],"Bayes":[119],"(MNB).":[120],"Our":[121],"best":[122],"run":[123],"achieved":[128],"91.46%":[129],"accuracy.":[130]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":5},{"year":2017,"cited_by_count":5}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
