{"id":"https://openalex.org/W2250662591","doi":"https://doi.org/10.3115/v1/w14-5314","title":"Using Maximum Entropy Models to Discriminate between Similar Languages and Varieties","display_name":"Using Maximum Entropy Models to Discriminate between Similar Languages and Varieties","publication_year":2014,"publication_date":"2014-01-01","ids":{"openalex":"https://openalex.org/W2250662591","doi":"https://doi.org/10.3115/v1/w14-5314","mag":"2250662591"},"language":"en","primary_location":{"id":"doi:10.3115/v1/w14-5314","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/w14-5314","pdf_url":"https://aclanthology.org/W14-5314.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the First Workshop on Applying NLP Tools to Similar Languages, Varieties and Dialects","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/W14-5314.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029379347","display_name":"Jordi Porta","orcid":"https://orcid.org/0000-0001-5620-4916"},"institutions":[{"id":"https://openalex.org/I1288451689","display_name":"Real Academia Espa\u00f1ola","ror":"https://ror.org/05t8ffp36","country_code":"ES","type":"education","lineage":["https://openalex.org/I1288451689"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Jordi Porta","raw_affiliation_strings":["Departamento de Tecnologa y Sistemas Centro de Estudios de la Real Academia Espaola c/ Serrano 187-189, 28002 Madrid","Departamento de Tecnolog\u00eda y Sistemas Centro de Estudios de la Real Academia Espa\u00f1ola c/ Serrano 187-189, 28002 Madrid"],"affiliations":[{"raw_affiliation_string":"Departamento de Tecnologa y Sistemas Centro de Estudios de la Real Academia Espaola c/ Serrano 187-189, 28002 Madrid","institution_ids":["https://openalex.org/I1288451689"]},{"raw_affiliation_string":"Departamento de Tecnolog\u00eda y Sistemas Centro de Estudios de la Real Academia Espa\u00f1ola c/ Serrano 187-189, 28002 Madrid","institution_ids":["https://openalex.org/I1288451689"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103204139","display_name":"Jos\u00e9 Luis Sancho","orcid":"https://orcid.org/0000-0002-2319-8641"},"institutions":[{"id":"https://openalex.org/I1288451689","display_name":"Real Academia Espa\u00f1ola","ror":"https://ror.org/05t8ffp36","country_code":"ES","type":"education","lineage":["https://openalex.org/I1288451689"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Jos\u00e9-Luis Sancho","raw_affiliation_strings":["Departamento de Tecnologa y Sistemas Centro de Estudios de la Real Academia Espaola c/ Serrano 187-189, 28002 Madrid","Departamento de Tecnolog\u00eda y Sistemas Centro de Estudios de la Real Academia Espa\u00f1ola c/ Serrano 187-189, 28002 Madrid"],"affiliations":[{"raw_affiliation_string":"Departamento de Tecnologa y Sistemas Centro de Estudios de la Real Academia Espaola c/ Serrano 187-189, 28002 Madrid","institution_ids":["https://openalex.org/I1288451689"]},{"raw_affiliation_string":"Departamento de Tecnolog\u00eda y Sistemas Centro de Estudios de la Real Academia Espa\u00f1ola c/ Serrano 187-189, 28002 Madrid","institution_ids":["https://openalex.org/I1288451689"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5029379347"],"corresponding_institution_ids":["https://openalex.org/I1288451689"],"apc_list":null,"apc_paid":null,"fwci":3.8057,"has_fulltext":true,"cited_by_count":20,"citation_normalized_percentile":{"value":0.94065894,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"120","last_page":"128"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9747999906539917,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7208664417266846},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.6645307540893555},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.6536554098129272},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6415732502937317},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.525766909122467},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.45517873764038086},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4359922707080841}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7208664417266846},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.6645307540893555},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.6536554098129272},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6415732502937317},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.525766909122467},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.45517873764038086},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4359922707080841},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3115/v1/w14-5314","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/w14-5314","pdf_url":"https://aclanthology.org/W14-5314.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the First Workshop on Applying NLP Tools to Similar Languages, Varieties and Dialects","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.678.7064","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.678.7064","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://anthology.aclweb.org/W/W14/W14-5314.pdf","raw_type":"text"}],"best_oa_location":{"id":"doi:10.3115/v1/w14-5314","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/w14-5314","pdf_url":"https://aclanthology.org/W14-5314.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the First Workshop on Applying NLP Tools to Similar Languages, Varieties and Dialects","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.7099999785423279}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2250662591.pdf","grobid_xml":"https://content.openalex.org/works/W2250662591.grobid-xml"},"referenced_works_count":26,"referenced_works":["https://openalex.org/W244375653","https://openalex.org/W402546610","https://openalex.org/W968157591","https://openalex.org/W1482098013","https://openalex.org/W1532342735","https://openalex.org/W1533946607","https://openalex.org/W1571735237","https://openalex.org/W1604270933","https://openalex.org/W1951381097","https://openalex.org/W1968411793","https://openalex.org/W1969005071","https://openalex.org/W2001792610","https://openalex.org/W2014077556","https://openalex.org/W2051461667","https://openalex.org/W2095944907","https://openalex.org/W2096175520","https://openalex.org/W2140432583","https://openalex.org/W2160842254","https://openalex.org/W2162019804","https://openalex.org/W2169200297","https://openalex.org/W2181262297","https://openalex.org/W2250174871","https://openalex.org/W2250185284","https://openalex.org/W2250271359","https://openalex.org/W2561254266","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W4388335561","https://openalex.org/W2970530566","https://openalex.org/W4288261899","https://openalex.org/W4307309205","https://openalex.org/W2967478618","https://openalex.org/W4385009901","https://openalex.org/W4385572700","https://openalex.org/W2997152889","https://openalex.org/W3041490575","https://openalex.org/W2970690932"],"abstract_inverted_index":{"DSLRAE":[0],"is":[1,22,42],"a":[2,25,29,38,68],"hierarchical":[3],"classifier":[4,41,66],"for":[5],"similar":[6],"written":[7],"languages":[8,52],"and":[9,71,80],"varieties":[10,54],"based":[11],"on":[12],"maximum-entropy":[13],"(maxent)":[14],"classifiers.":[15],"In":[16],"the":[17,20,35,46,51,56,65,90,97,112],"first":[18],"level,":[19,37],"text":[21,47],"classified":[23],"into":[24],"language":[26],"group":[27,62],"using":[28],"simple":[30],"token-based":[31],"maxent":[32,40],"classifier.":[33],"At":[34],"second":[36,110],"group-specific":[39],"applied":[43],"to":[44,89],"classify":[45],"as":[48],"one":[49],"of":[50,63,73,83,92],"or":[53,77],"within":[55],"previously":[57],"identified":[58],"group.":[59],"For":[60],"each":[61],"languages,":[64],"uses":[67],"different":[69],"kind":[70],"combination":[72],"knowledge-poor":[74],"features:":[75],"token":[76],"character":[78],"n-grams":[79],"'white":[81],"lists'":[82],"tokens.":[84],"Features":[85],"were":[86],"selected":[87],"according":[88],"results":[91],"applying":[93],"ten-fold":[94],"cross-validation":[95],"over":[96],"training":[98],"dataset.":[99],"The":[100],"system":[101],"presented":[102],"in":[103,111],"this":[104],"article":[105],"1":[106],"has":[107],"been":[108],"ranked":[109],"Discriminating":[113],"Similar":[114],"Language":[115],"(DSL)":[116],"shared":[117],"task":[118],"co-located":[119]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":3}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
