{"id":"https://openalex.org/W7140089395","doi":"https://doi.org/10.18653/v1/2026.eacl-long.123","title":"PortOldBERT: Portuguese Historical Language Models","display_name":"PortOldBERT: Portuguese Historical Language Models","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7140089395","doi":"https://doi.org/10.18653/v1/2026.eacl-long.123"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2026.eacl-long.123","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.eacl-long.123","pdf_url":"https://aclanthology.org/2026.eacl-long.123.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2026.eacl-long.123.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107159659","display_name":"Tom\u00e1s Freitas Os\u00f3rio","orcid":"https://orcid.org/0009-0001-2036-3197"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tomas Freitas Osorio","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5035645484","display_name":"Henrique Lopes Cardoso","orcid":"https://orcid.org/0000-0003-1252-7515"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Henrique Lopes Cardoso","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.36679678,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2691","last_page":"2705"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.1842000037431717,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.1842000037431717,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.0560000017285347,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.04879999905824661,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/portuguese","display_name":"Portuguese","score":0.7305999994277954},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5307000279426575},{"id":"https://openalex.org/keywords/historical-linguistics","display_name":"Historical linguistics","score":0.5256999731063843},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5171999931335449},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.44760000705718994},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4442000091075897}],"concepts":[{"id":"https://openalex.org/C35219183","wikidata":"https://www.wikidata.org/wiki/Q5146","display_name":"Portuguese","level":2,"score":0.7305999994277954},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6194999814033508},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.5974000096321106},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5307000279426575},{"id":"https://openalex.org/C181925144","wikidata":"https://www.wikidata.org/wiki/Q190375","display_name":"Historical linguistics","level":2,"score":0.5256999731063843},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5171999931335449},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5040000081062317},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48500001430511475},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.44760000705718994},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4442000091075897},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.421999990940094},{"id":"https://openalex.org/C2778880076","wikidata":"https://www.wikidata.org/wiki/Q750553","display_name":"Brazilian Portuguese","level":3,"score":0.3025999963283539},{"id":"https://openalex.org/C204160518","wikidata":"https://www.wikidata.org/wiki/Q122653","display_name":"Numeral system","level":2,"score":0.27570000290870667},{"id":"https://openalex.org/C3018824978","wikidata":"https://www.wikidata.org/wiki/Q2894891","display_name":"Error analysis","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C139228662","wikidata":"https://www.wikidata.org/wiki/Q5155640","display_name":"Comparative historical research","level":2,"score":0.27059999108314514},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.26579999923706055},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2614000141620636}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2026.eacl-long.123","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.eacl-long.123","pdf_url":"https://aclanthology.org/2026.eacl-long.123.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2026.eacl-long.123","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.eacl-long.123","pdf_url":"https://aclanthology.org/2026.eacl-long.123.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.6711567044258118,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320334779","display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","ror":"https://ror.org/00snfqn58"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7140089395.pdf","grobid_xml":"https://content.openalex.org/works/W7140089395.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Historical":[0],"language":[1,60],"models":[2],"play":[3],"a":[4],"crucial":[5],"role":[6],"in":[7,100],"the":[8,38,55,71,114],"study":[9],"of":[10,40],"languages,":[11],"and":[12,24,89],"can":[13],"benefit":[14],"tasks":[15],"such":[16,44],"as":[17],"named-entity":[18],"recognition":[19],"(NER),":[20],"partof-speech":[21],"(PoS)":[22],"tagging,":[23,83],"post-OCR":[25],"correction,":[26],"among":[27],"others.Despite":[28],"their":[29],"relevance,":[30],"most":[31],"efforts":[32],"have":[33],"been":[34],"concentrated":[35],"on":[36,73],"English.To":[37],"best":[39],"our":[41],"knowledge,":[42],"no":[43],"model":[45],"exists":[46],"for":[47,93],"historical":[48,57,95,101,109],"Portuguese.In":[49],"this":[50],"work,":[51],"we":[52],"introduce":[53],"PortOldBERT,":[54],"first":[56],"Portuguese":[58],"encoder":[59,72],"model.We":[61],"demonstrate":[62],"its":[63,104],"usefulness":[64],"by":[65],"comparing":[66],"PortOldBERT's":[67],"performance":[68],"with":[69],"Albertina,":[70],"which":[74],"it":[75],"is":[76],"based,":[77],"across":[78],"multiple":[79],"tasks-pseudo-perplexity,":[80],"NER,":[81],"PoS":[82],"word":[84],"error":[85,91],"rate":[86],"(WER)":[87],"prediction,":[88],"OCR":[90],"detection-and":[92],"different":[94],"periods.PortOldBERT":[96],"consistently":[97],"outperforms":[98],"Albertina":[99],"data,":[102],"demonstrating":[103],"ability":[105,115],"to":[106,116],"effectively":[107],"integrate":[108],"linguistic":[110],"contexts":[111],"while":[112],"retaining":[113],"process":[117],"contemporary":[118],"text.":[119]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-24T00:00:00"}
