{"id":"https://openalex.org/W4408403679","doi":"https://doi.org/10.1145/3677389.3702578","title":"Seventeenth-Century Spanish American Notary Records for Fine-Tuning Spanish Large Language Models","display_name":"Seventeenth-Century Spanish American Notary Records for Fine-Tuning Spanish Large Language Models","publication_year":2024,"publication_date":"2024-12-16","ids":{"openalex":"https://openalex.org/W4408403679","doi":"https://doi.org/10.1145/3677389.3702578"},"language":"en","primary_location":{"id":"doi:10.1145/3677389.3702578","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3677389.3702578","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM/IEEE Joint Conference on Digital Libraries","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062425611","display_name":"Shraboni Sarker","orcid":"https://orcid.org/0009-0009-1513-7521"},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shraboni Sarker","raw_affiliation_strings":["University of Missouri, Columbia, MO, USA"],"raw_orcid":"https://orcid.org/0009-0009-1513-7521","affiliations":[{"raw_affiliation_string":"University of Missouri, Columbia, MO, USA","institution_ids":["https://openalex.org/I76835614"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111226565","display_name":"Ahmad Tamim Hamad","orcid":null},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ahmad Tamim Hamad","raw_affiliation_strings":["The University of Missouri, Columbia, USA"],"raw_orcid":"https://orcid.org/0009-0003-2483-1626","affiliations":[{"raw_affiliation_string":"The University of Missouri, Columbia, USA","institution_ids":["https://openalex.org/I76835614"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099096784","display_name":"Hulayyil Alshammari","orcid":null},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hulayyil Alshammari","raw_affiliation_strings":["The University of Missouri, Columbia, USA"],"raw_orcid":"https://orcid.org/0009-0003-8012-3501","affiliations":[{"raw_affiliation_string":"The University of Missouri, Columbia, USA","institution_ids":["https://openalex.org/I76835614"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051614422","display_name":"Viviana L. Grieco","orcid":"https://orcid.org/0000-0002-6350-6995"},"institutions":[{"id":"https://openalex.org/I75421653","display_name":"University of Missouri\u2013Kansas City","ror":"https://ror.org/01w0d5g70","country_code":"US","type":"education","lineage":["https://openalex.org/I75421653"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Viviana Grieco","raw_affiliation_strings":["University of Missouri-Kansas City, Columbia, MO, USA"],"raw_orcid":"https://orcid.org/0000-0002-6350-6995","affiliations":[{"raw_affiliation_string":"University of Missouri-Kansas City, Columbia, MO, USA","institution_ids":["https://openalex.org/I75421653"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087950601","display_name":"Praveen Rao","orcid":"https://orcid.org/0000-0002-1859-0438"},"institutions":[{"id":"https://openalex.org/I76835614","display_name":"University of Missouri","ror":"https://ror.org/02ymw8z06","country_code":"US","type":"education","lineage":["https://openalex.org/I76835614"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Praveen Rao","raw_affiliation_strings":["Electrical Engineering and Computer Science, The University of Missouri, Columbia, USA"],"raw_orcid":"https://orcid.org/0000-0002-1859-0438","affiliations":[{"raw_affiliation_string":"Electrical Engineering and Computer Science, The University of Missouri, Columbia, USA","institution_ids":["https://openalex.org/I76835614"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5062425611"],"corresponding_institution_ids":["https://openalex.org/I76835614"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.48719731,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12341","display_name":"Spanish Linguistics and Language Studies","score":0.9700999855995178,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12341","display_name":"Spanish Linguistics and Language Studies","score":0.9700999855995178,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T14442","display_name":"Historical Studies in Science","score":0.9573000073432922,"subfield":{"id":"https://openalex.org/subfields/1207","display_name":"History and Philosophy of Science"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13156","display_name":"Historical Linguistics and Language Studies","score":0.9570000171661377,"subfield":{"id":"https://openalex.org/subfields/1203","display_name":"Language and Linguistics"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4943089485168457},{"id":"https://openalex.org/keywords/history","display_name":"History","score":0.3934519290924072}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4943089485168457},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.3934519290924072}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3677389.3702578","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3677389.3702578","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM/IEEE Joint Conference on Digital Libraries","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W1976267225","https://openalex.org/W2612690371","https://openalex.org/W2741609678","https://openalex.org/W2896457183","https://openalex.org/W2952638691","https://openalex.org/W3092694106","https://openalex.org/W3206264130","https://openalex.org/W4245031736","https://openalex.org/W4388676658","https://openalex.org/W4390118501","https://openalex.org/W4390479058"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Large":[0],"language":[1,50,61,158],"models":[2,165],"(LLMs)":[3],"have":[4],"gained":[5],"tremendous":[6],"popularity":[7],"in":[8,133,179],"domains":[9],"such":[10,57,153],"as":[11,58,127,154],"ecommerce,":[12],"finance,":[13],"healthcare,":[14],"and":[15,64,94,113,156,160,166,184],"education.":[16],"Fine-tuning":[17],"is":[18,68,125,185],"a":[19,27,31,40,53,69,89],"common":[20],"approach":[21],"to":[22,51,147],"customize":[23],"an":[24,172],"LLM":[25],"on":[26],"domain-specific":[28],"dataset":[29,42,67,143,169],"for":[30,43,47,151,175],"desired":[32],"downstream":[33],"task.":[34],"In":[35],"this":[36],"paper,":[37],"we":[38,139],"present":[39],"valuable":[41],"fine-tuning":[44],"LLMs":[45,150,183],"developed":[46],"the":[48,76,81,180],"Spanish":[49,149,164],"perform":[52],"variety":[54],"of":[55,71,84,91,99,182],"tasks":[56,152],"classification,":[59],"masked":[60,157],"modeling,":[62,159],"clustering,":[63],"others.":[65],"Our":[66,123,168],"collection":[70,87],"handwritten":[72,104],"notary":[73],"records":[74],"from":[75,80],"seventeenth":[77],"century":[78],"obtained":[79],"National":[82],"Archives":[83],"Argentina.":[85],"This":[86],"contains":[88],"combination":[90],"original":[92],"images":[93],"transcribed":[95],"text":[96,177],"(and":[97],"metadata)":[98],"160+":[100],"pages":[101],"that":[102,141],"were":[103],"by":[105,131],"two":[106],"notaries,":[107],"namely,":[108],"Estenban":[109],"Agreda":[110],"de":[111,115],"Vergara":[112],"Nicolas":[114],"Valdivia":[116],"y":[117],"Brisuela":[118],"nearly":[119],"400":[120],"years":[121],"ago.":[122],"transcription":[124],"accurate":[126],"it":[128],"was":[129],"prepared":[130],"experts":[132],"17th-century":[134],"Spanish.":[135],"Through":[136],"empirical":[137],"evaluation,":[138],"demonstrate":[140],"our":[142],"can":[144,161],"be":[145,171],"used":[146],"fine-tune":[148],"classification":[155],"outperform":[162],"pretrained":[163],"ChatGPT-3.5/ChatGPT-4o.":[167],"will":[170],"invaluable":[173],"resource":[174],"historical":[176],"analysis":[178],"era":[181],"available":[186],"via":[187],"GitHub":[188],"at":[189],"https://github.com/raopr/SpanishNotaryCollection.":[190]},"counts_by_year":[],"updated_date":"2025-12-28T23:10:05.387466","created_date":"2025-10-10T00:00:00"}
