{"id":"https://openalex.org/W2515412299","doi":"https://doi.org/10.18653/v1/w16-2112","title":"Combining Phonology and Morphology for the Normalization of Historical Texts","display_name":"Combining Phonology and Morphology for the Normalization of Historical Texts","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2515412299","doi":"https://doi.org/10.18653/v1/w16-2112","mag":"2515412299"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w16-2112","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-2112","pdf_url":"https://www.aclweb.org/anthology/W16-2112.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 10th SIGHUM Workshop on Language Technology for\n          Cultural Heritage, Social Sciences, and Humanities","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W16-2112.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101652723","display_name":"Izaskun Etxeberria","orcid":"https://orcid.org/0000-0002-8251-7411"},"institutions":[{"id":"https://openalex.org/I169108374","display_name":"University of the Basque Country","ror":"https://ror.org/000xsnr85","country_code":"ES","type":"education","lineage":["https://openalex.org/I169108374"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Izaskun Etxeberria","raw_affiliation_strings":["IXA taldea, UPV-EHU"],"affiliations":[{"raw_affiliation_string":"IXA taldea, UPV-EHU","institution_ids":["https://openalex.org/I169108374"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027820865","display_name":"I\u00f1aki Alegria","orcid":"https://orcid.org/0000-0002-0272-1472"},"institutions":[{"id":"https://openalex.org/I169108374","display_name":"University of the Basque Country","ror":"https://ror.org/000xsnr85","country_code":"ES","type":"education","lineage":["https://openalex.org/I169108374"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"I\u00f1aki Alegria","raw_affiliation_strings":["IXA taldea, UPV-EHU"],"affiliations":[{"raw_affiliation_string":"IXA taldea, UPV-EHU","institution_ids":["https://openalex.org/I169108374"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056991380","display_name":"Larraitz Uria","orcid":"https://orcid.org/0000-0002-2778-4605"},"institutions":[{"id":"https://openalex.org/I169108374","display_name":"University of the Basque Country","ror":"https://ror.org/000xsnr85","country_code":"ES","type":"education","lineage":["https://openalex.org/I169108374"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Larraitz Uria","raw_affiliation_strings":["IXA taldea, UPV-EHU"],"affiliations":[{"raw_affiliation_string":"IXA taldea, UPV-EHU","institution_ids":["https://openalex.org/I169108374"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001438384","display_name":"Mans Hulden","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mans Hulden","raw_affiliation_strings":["Department of Linguistics University of Colorado"],"affiliations":[{"raw_affiliation_string":"Department of Linguistics University of Colorado","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101652723"],"corresponding_institution_ids":["https://openalex.org/I169108374"],"apc_list":null,"apc_paid":null,"fwci":0.4434,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.79413844,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"100","last_page":"105"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9850999712944031,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/morpheme","display_name":"Morpheme","score":0.9180645942687988},{"id":"https://openalex.org/keywords/phonology","display_name":"Phonology","score":0.8488118052482605},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.7756285667419434},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7460499405860901},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6536178588867188},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5954445600509644},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.4878513813018799},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.05613124370574951}],"concepts":[{"id":"https://openalex.org/C165297611","wikidata":"https://www.wikidata.org/wiki/Q43249","display_name":"Morpheme","level":2,"score":0.9180645942687988},{"id":"https://openalex.org/C148934300","wikidata":"https://www.wikidata.org/wiki/Q40998","display_name":"Phonology","level":2,"score":0.8488118052482605},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.7756285667419434},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7460499405860901},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6536178588867188},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5954445600509644},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.4878513813018799},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.05613124370574951},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/w16-2112","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-2112","pdf_url":"https://www.aclweb.org/anthology/W16-2112.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 10th SIGHUM Workshop on Language Technology for\n          Cultural Heritage, Social Sciences, and Humanities","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/w16-2112","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-2112","pdf_url":"https://www.aclweb.org/anthology/W16-2112.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 10th SIGHUM Workshop on Language Technology for\n          Cultural Heritage, Social Sciences, and Humanities","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.6299999952316284,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G7420919289","display_name":null,"funder_award_id":"FEDER","funder_id":"https://openalex.org/F4320321837","funder_display_name":"Ministerio de Econom\u00eda y Competitividad"}],"funders":[{"id":"https://openalex.org/F4320321705","display_name":"Eusko Jaurlaritza","ror":"https://ror.org/00pz2fp31"},{"id":"https://openalex.org/F4320321837","display_name":"Ministerio de Econom\u00eda y Competitividad","ror":"https://ror.org/034900433"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2515412299.pdf","grobid_xml":"https://content.openalex.org/works/W2515412299.grobid-xml"},"referenced_works_count":15,"referenced_works":["https://openalex.org/W8550301","https://openalex.org/W1822791557","https://openalex.org/W1999111115","https://openalex.org/W2091889711","https://openalex.org/W2117621558","https://openalex.org/W2138238723","https://openalex.org/W2169147927","https://openalex.org/W2251463342","https://openalex.org/W2251711535","https://openalex.org/W2271671850","https://openalex.org/W2462051594","https://openalex.org/W2576894420","https://openalex.org/W2610481136","https://openalex.org/W2952097392","https://openalex.org/W3183153947"],"related_works":["https://openalex.org/W2129621419","https://openalex.org/W2612834414","https://openalex.org/W4206056825","https://openalex.org/W291561599","https://openalex.org/W4303859383","https://openalex.org/W1552926714","https://openalex.org/W2081547736","https://openalex.org/W2023873537","https://openalex.org/W4231759894","https://openalex.org/W2018629196"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3],"proposal":[4],"for":[5,43],"the":[6,30,55],"normalization":[7],"of":[8,24,32,54,57],"word-forms":[9],"in":[10,70],"historical":[11],"texts.":[12],"To":[13],"perform":[14],"this":[15],"task,":[16],"we":[17,36],"extend":[18],"our":[19,38],"previous":[20],"research":[21],"on":[22],"induction":[23,53],"phonology":[25],"and":[26],"adapt":[27],"it":[28],"to":[29,60],"task":[31],"normalization.":[33],"In":[34],"particular,":[35],"combine":[37],"earlier":[39],"models":[40,42],"with":[41],"learning":[44],"morphology":[45],"(without":[46],"additional":[47],"supervision).":[48],"The":[49],"results":[50],"are":[51],"mixed:":[52],"segmentation":[56],"morphemes":[58],"fails":[59],"directly":[61],"offer":[62],"significant":[63],"improvements":[64],"while":[65],"including":[66],"known":[67],"morpheme":[68],"boundaries":[69],"standard":[71],"texts":[72],"do":[73],"improve":[74],"results.":[75]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2026-03-14T08:43:22.919905","created_date":"2025-10-10T00:00:00"}
