{"id":"https://openalex.org/W2482622595","doi":"https://doi.org/10.18653/v1/w16-6212","title":"Hierarchical Character-Word Models for Language Identification","display_name":"Hierarchical Character-Word Models for Language Identification","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2482622595","doi":"https://doi.org/10.18653/v1/w16-6212","mag":"2482622595"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w16-6212","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-6212","pdf_url":"https://www.aclweb.org/anthology/W16-6212.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of The Fourth International Workshop on Natural Language\n          Processing for Social Media","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W16-6212.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082970015","display_name":"Aaron Jaech","orcid":null},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Aaron Jaech","raw_affiliation_strings":["Electrical Engineering","University of Pittsburgh, Pittsburgh, United States"],"affiliations":[{"raw_affiliation_string":"Electrical Engineering","institution_ids":[]},{"raw_affiliation_string":"University of Pittsburgh, Pittsburgh, United States","institution_ids":["https://openalex.org/I170201317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071629800","display_name":"George Mulcaire","orcid":null},"institutions":[{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]},{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"George Mulcaire","raw_affiliation_strings":["Computer Science & Engineering University of Washington, Seattle, WA 98195, USA","University of Washington, Seattle, United States"],"affiliations":[{"raw_affiliation_string":"Computer Science & Engineering University of Washington, Seattle, WA 98195, USA","institution_ids":["https://openalex.org/I201448701"]},{"raw_affiliation_string":"University of Washington, Seattle, United States","institution_ids":["https://openalex.org/I58610484","https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080067076","display_name":"Shobhit Hathi","orcid":null},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shobhit Hathi","raw_affiliation_strings":["Computer Science & Engineering University of Washington, Seattle, WA 98195, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science & Engineering University of Washington, Seattle, WA 98195, USA","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087215613","display_name":"Mari Ostendorf","orcid":null},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]},{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mari Ostendorf","raw_affiliation_strings":["Electrical Engineering","University of Washington, Seattle, United States"],"affiliations":[{"raw_affiliation_string":"Electrical Engineering","institution_ids":[]},{"raw_affiliation_string":"University of Washington, Seattle, United States","institution_ids":["https://openalex.org/I58610484","https://openalex.org/I201448701"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088517824","display_name":"Noah A. Smith","orcid":"https://orcid.org/0000-0002-2310-6380"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]},{"id":"https://openalex.org/I58610484","display_name":"Seattle University","ror":"https://ror.org/02jqc0m91","country_code":"US","type":"education","lineage":["https://openalex.org/I58610484"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Noah A. Smith","raw_affiliation_strings":["Computer Science & Engineering University of Washington, Seattle, WA 98195, USA","University of Washington, Seattle, United States"],"affiliations":[{"raw_affiliation_string":"Computer Science & Engineering University of Washington, Seattle, WA 98195, USA","institution_ids":["https://openalex.org/I201448701"]},{"raw_affiliation_string":"University of Washington, Seattle, United States","institution_ids":["https://openalex.org/I58610484","https://openalex.org/I201448701"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5082970015"],"corresponding_institution_ids":["https://openalex.org/I170201317"],"apc_list":null,"apc_paid":null,"fwci":1.97313815,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":{"value":0.94089261,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"84","last_page":"93"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.8061281442642212},{"id":"https://openalex.org/keywords/spelling","display_name":"Spelling","score":0.7838131785392761},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7628939151763916},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.7609635591506958},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.7584618926048279},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6883716583251953},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.624485194683075},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6135045289993286},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5335657596588135},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.49460718035697937},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4869176149368286},{"id":"https://openalex.org/keywords/hierarchical-database-model","display_name":"Hierarchical database model","score":0.42791423201560974},{"id":"https://openalex.org/keywords/base","display_name":"Base (topology)","score":0.42754748463630676},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.23505112528800964},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1062152087688446},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07315140962600708}],"concepts":[{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.8061281442642212},{"id":"https://openalex.org/C2777801307","wikidata":"https://www.wikidata.org/wiki/Q2088390","display_name":"Spelling","level":2,"score":0.7838131785392761},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7628939151763916},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.7609635591506958},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.7584618926048279},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6883716583251953},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.624485194683075},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6135045289993286},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5335657596588135},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.49460718035697937},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4869176149368286},{"id":"https://openalex.org/C144986985","wikidata":"https://www.wikidata.org/wiki/Q871236","display_name":"Hierarchical database model","level":2,"score":0.42791423201560974},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.42754748463630676},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.23505112528800964},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1062152087688446},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07315140962600708},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.18653/v1/w16-6212","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-6212","pdf_url":"https://www.aclweb.org/anthology/W16-6212.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of The Fourth International Workshop on Natural Language\n          Processing for Social Media","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1608.03030","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1608.03030","pdf_url":"https://arxiv.org/pdf/1608.03030","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2482622595","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1608.03030.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1608.03030","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1608.03030","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.18653/v1/w16-6212","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-6212","pdf_url":"https://www.aclweb.org/anthology/W16-6212.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of The Fourth International Workshop on Natural Language\n          Processing for Social Media","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.5299999713897705,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306078","display_name":"U.S. Department of Defense","ror":"https://ror.org/0447fe631"},{"id":"https://openalex.org/F4320307943","display_name":"Raytheon Company","ror":"https://ror.org/0354t7b78"},{"id":"https://openalex.org/F4320310094","display_name":"University of Washington","ror":"https://ror.org/00cvxb145"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2482622595.pdf","grobid_xml":"https://content.openalex.org/works/W2482622595.grobid-xml"},"referenced_works_count":27,"referenced_works":["https://openalex.org/W179875071","https://openalex.org/W850909210","https://openalex.org/W1522301498","https://openalex.org/W1533946607","https://openalex.org/W1591801644","https://openalex.org/W1631260214","https://openalex.org/W1951325712","https://openalex.org/W2012603689","https://openalex.org/W2101609803","https://openalex.org/W2169200297","https://openalex.org/W2185720331","https://openalex.org/W2194775991","https://openalex.org/W2220350356","https://openalex.org/W2246212392","https://openalex.org/W2251149908","https://openalex.org/W2251681917","https://openalex.org/W2291291686","https://openalex.org/W2293634267","https://openalex.org/W2313437166","https://openalex.org/W2407436506","https://openalex.org/W2475268295","https://openalex.org/W2505556938","https://openalex.org/W2546302380","https://openalex.org/W2949563612","https://openalex.org/W2951336364","https://openalex.org/W2951559648","https://openalex.org/W2964308564"],"related_works":["https://openalex.org/W2963461183","https://openalex.org/W124635070","https://openalex.org/W3108387573","https://openalex.org/W3113462843","https://openalex.org/W2250548009","https://openalex.org/W3016030119","https://openalex.org/W2894420350","https://openalex.org/W2963899393","https://openalex.org/W2962739339","https://openalex.org/W2104071081","https://openalex.org/W2950391118","https://openalex.org/W2766727221","https://openalex.org/W3036954737","https://openalex.org/W3176125721","https://openalex.org/W3116885119","https://openalex.org/W2505232340","https://openalex.org/W2575428825","https://openalex.org/W2404244834","https://openalex.org/W2970397112","https://openalex.org/W2847671867"],"abstract_inverted_index":{"Social":[0],"media":[1],"messages'":[2],"brevity":[3],"and":[4,21,36],"unconventional":[5],"spelling":[6],"pose":[7],"a":[8,15],"challenge":[9],"to":[10],"language":[11,26],"identification.":[12,27],"We":[13],"introduce":[14],"hierarchical":[16],"model":[17],"that":[18],"learns":[19],"character":[20],"contextualized":[22],"word-level":[23],"representations":[24],"for":[25],"Our":[28],"method":[29],"performs":[30],"well":[31],"against":[32],"strong":[33],"base-":[34],"lines,":[35],"can":[37],"also":[38],"reveal":[39],"code-switching.":[40]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":3}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
