{"id":"https://openalex.org/W2927428614","doi":"https://doi.org/10.1017/s1351324918000505","title":"Weighted finite-state transducers for normalization of historical texts","display_name":"Weighted finite-state transducers for normalization of historical texts","publication_year":2019,"publication_date":"2019-03-01","ids":{"openalex":"https://openalex.org/W2927428614","doi":"https://doi.org/10.1017/s1351324918000505","mag":"2927428614"},"language":"en","primary_location":{"id":"doi:10.1017/s1351324918000505","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s1351324918000505","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/0743ABEDB49551A48BD5985833C4EC69/S1351324918000505a.pdf/div-class-title-weighted-finite-state-transducers-for-normalization-of-historical-texts-div.pdf","source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/0743ABEDB49551A48BD5985833C4EC69/S1351324918000505a.pdf/div-class-title-weighted-finite-state-transducers-for-normalization-of-historical-texts-div.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101652723","display_name":"Izaskun Etxeberria","orcid":"https://orcid.org/0000-0002-8251-7411"},"institutions":[{"id":"https://openalex.org/I169108374","display_name":"University of the Basque Country","ror":"https://ror.org/000xsnr85","country_code":"ES","type":"education","lineage":["https://openalex.org/I169108374"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Izaskun Etxeberria","raw_affiliation_strings":["IXA Group, University of the Basque Country, Donostia-San Sebasti\u00e1n, Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IXA Group, University of the Basque Country, Donostia-San Sebasti\u00e1n, Spain","institution_ids":["https://openalex.org/I169108374"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027820865","display_name":"I\u00f1aki Alegria","orcid":"https://orcid.org/0000-0002-0272-1472"},"institutions":[{"id":"https://openalex.org/I169108374","display_name":"University of the Basque Country","ror":"https://ror.org/000xsnr85","country_code":"ES","type":"education","lineage":["https://openalex.org/I169108374"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"I\u00f1aki Alegria","raw_affiliation_strings":["IXA Group, University of the Basque Country, Donostia-San Sebasti\u00e1n, Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IXA Group, University of the Basque Country, Donostia-San Sebasti\u00e1n, Spain","institution_ids":["https://openalex.org/I169108374"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056991380","display_name":"Larraitz Uria","orcid":"https://orcid.org/0000-0002-2778-4605"},"institutions":[{"id":"https://openalex.org/I169108374","display_name":"University of the Basque Country","ror":"https://ror.org/000xsnr85","country_code":"ES","type":"education","lineage":["https://openalex.org/I169108374"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Larraitz Uria","raw_affiliation_strings":["IXA Group, University of the Basque Country, Donostia-San Sebasti\u00e1n, Spain"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IXA Group, University of the Basque Country, Donostia-San Sebasti\u00e1n, Spain","institution_ids":["https://openalex.org/I169108374"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101652723"],"corresponding_institution_ids":["https://openalex.org/I169108374"],"apc_list":null,"apc_paid":null,"fwci":0.5803,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.74859491,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"25","issue":"2","first_page":"307","last_page":"321"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9087094664573669},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.8466709852218628},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6422470808029175},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.582116425037384},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5478817820549011},{"id":"https://openalex.org/keywords/finite-state","display_name":"Finite state","score":0.483547180891037},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.46666234731674194},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2347712218761444},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.16518187522888184}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9087094664573669},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.8466709852218628},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6422470808029175},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.582116425037384},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5478817820549011},{"id":"https://openalex.org/C2983497884","wikidata":"https://www.wikidata.org/wiki/Q176452","display_name":"Finite state","level":3,"score":0.483547180891037},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.46666234731674194},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2347712218761444},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.16518187522888184},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1017/s1351324918000505","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s1351324918000505","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/0743ABEDB49551A48BD5985833C4EC69/S1351324918000505a.pdf/div-class-title-weighted-finite-state-transducers-for-normalization-of-historical-texts-div.pdf","source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1017/s1351324918000505","is_oa":true,"landing_page_url":"https://doi.org/10.1017/s1351324918000505","pdf_url":"https://www.cambridge.org/core/services/aop-cambridge-core/content/view/0743ABEDB49551A48BD5985833C4EC69/S1351324918000505a.pdf/div-class-title-weighted-finite-state-transducers-for-normalization-of-historical-texts-div.pdf","source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8600000143051147,"display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321837","display_name":"Ministerio de Econom\u00eda y Competitividad","ror":"https://ror.org/034900433"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W8550301","https://openalex.org/W123309796","https://openalex.org/W417337976","https://openalex.org/W1502161415","https://openalex.org/W1561949675","https://openalex.org/W1582482241","https://openalex.org/W1585951665","https://openalex.org/W1822791557","https://openalex.org/W1873914511","https://openalex.org/W1987902506","https://openalex.org/W1999111115","https://openalex.org/W2013304636","https://openalex.org/W2014611589","https://openalex.org/W2075123920","https://openalex.org/W2091889711","https://openalex.org/W2118947254","https://openalex.org/W2133666967","https://openalex.org/W2143092851","https://openalex.org/W2243751648","https://openalex.org/W2250875199","https://openalex.org/W2251463342","https://openalex.org/W2251711535","https://openalex.org/W2262393948","https://openalex.org/W2266080033","https://openalex.org/W2271671850","https://openalex.org/W2294413629","https://openalex.org/W2462051594","https://openalex.org/W2483327705","https://openalex.org/W2515412299","https://openalex.org/W2524643686","https://openalex.org/W2529518929","https://openalex.org/W2531638282","https://openalex.org/W2576894420","https://openalex.org/W2603442068","https://openalex.org/W2624926655","https://openalex.org/W2765448416","https://openalex.org/W2912359492","https://openalex.org/W2914151435","https://openalex.org/W2963805136","https://openalex.org/W2982630078","https://openalex.org/W3024625086","https://openalex.org/W4210706177","https://openalex.org/W4250944796","https://openalex.org/W6629863463"],"related_works":["https://openalex.org/W2953716828","https://openalex.org/W2904857019","https://openalex.org/W2944728705","https://openalex.org/W3011538607","https://openalex.org/W2904022177","https://openalex.org/W2591697403","https://openalex.org/W4321441197","https://openalex.org/W2359348847","https://openalex.org/W4294432981","https://openalex.org/W4321276295"],"abstract_inverted_index":{"Abstract":[0],"This":[1],"paper":[2],"presents":[3],"a":[4,78,84,90],"study":[5],"about":[6],"methods":[7,17],"for":[8,34,109,122],"normalization":[9],"of":[10,15,51,93,120,130,145],"historical":[11,22],"texts.":[12],"The":[13,101],"aim":[14],"these":[16],"is":[18,64,83],"learning":[19],"relations":[20],"between":[21],"and":[23,31,37,39,54,136],"contemporary":[24],"word":[25],"forms.":[26],"We":[27],"have":[28,41],"compiled":[29,102],"training":[30,131],"test":[32],"corpora":[33,53,103],"different":[35],"languages":[36],"scenarios,":[38],"we":[40,125],"tried":[42],"to":[43,48,66,73,97,106,114,141],"read":[44],"the":[45,49,52,118,123,128,134,143,146],"results":[46,135],"related":[47],"features":[50],"languages.":[55],"Our":[56,70],"proposed":[57],"method,":[58],"based":[59],"on":[60],"weighted":[61],"finite-state":[62],"transducers,":[63],"compared":[65],"previously":[67],"published":[68],"ones.":[69],"method":[71],"learns":[72],"map":[74],"phonological":[75],"changes":[76],"using":[77],"noisy":[79],"channel":[80],"model;":[81],"it":[82],"simple":[85],"solution":[86],"that":[87],"can":[88],"use":[89],"limited":[91],"amount":[92,119],"supervision":[94,121],"in":[95,112],"order":[96,113],"achieve":[98],"adequate":[99],"performance.":[100],"are":[104],"ready":[105],"be":[107],"used":[108],"other":[110],"researchers":[111],"compare":[115],"results.":[116],"Concerning":[117],"task,":[124],"investigate":[126],"how":[127],"size":[129],"corpus":[132],"affects":[133],"identify":[137],"some":[138],"interesting":[139],"factors":[140],"anticipate":[142],"difficulty":[144],"task.":[147]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1}],"updated_date":"2026-05-22T06:13:13.366637","created_date":"2025-10-10T00:00:00"}
