{"id":"https://openalex.org/W4406321622","doi":"https://doi.org/10.1109/access.2025.3528733","title":"Abstractive Summarization of Historical Documents: A New Dataset and Novel Method Using a Domain-Specific Pretrained Model","display_name":"Abstractive Summarization of Historical Documents: A New Dataset and Novel Method Using a Domain-Specific Pretrained Model","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4406321622","doi":"https://doi.org/10.1109/access.2025.3528733"},"language":"en","primary_location":{"id":"doi:10.1109/access.2025.3528733","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3528733","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2025.3528733","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115862962","display_name":"Keerthana Murugaraj","orcid":"https://orcid.org/0009-0008-5100-055X"},"institutions":[{"id":"https://openalex.org/I186903577","display_name":"University of Luxembourg","ror":"https://ror.org/036x5ad56","country_code":"LU","type":"education","lineage":["https://openalex.org/I186903577"]}],"countries":["LU"],"is_corresponding":true,"raw_author_name":"Keerthana Murugaraj","raw_affiliation_strings":["Department of Computer Science, Faculty of Science, Technology and Medicine, University of Luxembourg, Luxembourg City, Luxembourg","Department of Computer Science, Faculty of Science, Technology and Medicine, University of Luxembourg, Luxembourg"],"raw_orcid":"https://orcid.org/0009-0008-5100-055X","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Faculty of Science, Technology and Medicine, University of Luxembourg, Luxembourg City, Luxembourg","institution_ids":["https://openalex.org/I186903577"]},{"raw_affiliation_string":"Department of Computer Science, Faculty of Science, Technology and Medicine, University of Luxembourg, Luxembourg","institution_ids":["https://openalex.org/I186903577"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070022854","display_name":"Salima Lamsiyah","orcid":"https://orcid.org/0000-0001-8789-5713"},"institutions":[{"id":"https://openalex.org/I186903577","display_name":"University of Luxembourg","ror":"https://ror.org/036x5ad56","country_code":"LU","type":"education","lineage":["https://openalex.org/I186903577"]}],"countries":["LU"],"is_corresponding":false,"raw_author_name":"Salima Lamsiyah","raw_affiliation_strings":["Department of Computer Science, Faculty of Science, Technology and Medicine, University of Luxembourg, Luxembourg City, Luxembourg","Department of Computer Science, Faculty of Science, Technology and Medicine, University of Luxembourg, Luxembourg"],"raw_orcid":"https://orcid.org/0000-0001-8789-5713","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Faculty of Science, Technology and Medicine, University of Luxembourg, Luxembourg City, Luxembourg","institution_ids":["https://openalex.org/I186903577"]},{"raw_affiliation_string":"Department of Computer Science, Faculty of Science, Technology and Medicine, University of Luxembourg, Luxembourg","institution_ids":["https://openalex.org/I186903577"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055092426","display_name":"Christoph Schommer","orcid":"https://orcid.org/0000-0002-0308-7637"},"institutions":[{"id":"https://openalex.org/I186903577","display_name":"University of Luxembourg","ror":"https://ror.org/036x5ad56","country_code":"LU","type":"education","lineage":["https://openalex.org/I186903577"]}],"countries":["LU"],"is_corresponding":false,"raw_author_name":"Christoph Schommer","raw_affiliation_strings":["Department of Computer Science, Faculty of Science, Technology and Medicine, University of Luxembourg, Luxembourg City, Luxembourg","Department of Computer Science, Faculty of Science, Technology and Medicine, University of Luxembourg, Luxembourg"],"raw_orcid":"https://orcid.org/0000-0002-0308-7637","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Faculty of Science, Technology and Medicine, University of Luxembourg, Luxembourg City, Luxembourg","institution_ids":["https://openalex.org/I186903577"]},{"raw_affiliation_string":"Department of Computer Science, Faculty of Science, Technology and Medicine, University of Luxembourg, Luxembourg","institution_ids":["https://openalex.org/I186903577"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5115862962"],"corresponding_institution_ids":["https://openalex.org/I186903577"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":13.34,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.98327659,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":"13","issue":null,"first_page":"10918","last_page":"10932"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.978600025177002,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.978600025177002,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.972599983215332,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9077000021934509,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.9331560134887695},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8240681290626526},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5768948793411255},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5116081833839417},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5077370405197144},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4764452576637268},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.05524533987045288}],"concepts":[{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.9331560134887695},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8240681290626526},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5768948793411255},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5116081833839417},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5077370405197144},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4764452576637268},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.05524533987045288},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/access.2025.3528733","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3528733","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:orbilu.uni.lu:10993/63750","is_oa":true,"landing_page_url":"https://orbilu.uni.lu/handle/10993/63750","pdf_url":"https://orbilu.uni.lu/bitstream/10993/63750/1/Published_IEEE.pdf","source":{"id":"https://openalex.org/S4306401815","display_name":"Open Repository and Bibliography (University of Luxembourg)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I186903577","host_organization_name":"University of Luxembourg","host_organization_lineage":["https://openalex.org/I186903577"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, 13, 10918-10932 (2025)","raw_type":"peer reviewed"},{"id":"pmh:oai:doaj.org/article:0ca324fb59af43dd8daa6b9079617407","is_oa":true,"landing_page_url":"https://doaj.org/article/0ca324fb59af43dd8daa6b9079617407","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 13, Pp 10918-10932 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2025.3528733","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3528733","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":57,"referenced_works":["https://openalex.org/W176323200","https://openalex.org/W1535000235","https://openalex.org/W1967082914","https://openalex.org/W1985697096","https://openalex.org/W2007407316","https://openalex.org/W2081056190","https://openalex.org/W2123442489","https://openalex.org/W2202223467","https://openalex.org/W2888482885","https://openalex.org/W2911160197","https://openalex.org/W2923446382","https://openalex.org/W2955906716","https://openalex.org/W2962965405","https://openalex.org/W2963837895","https://openalex.org/W2963926728","https://openalex.org/W2963929190","https://openalex.org/W2964028111","https://openalex.org/W2964325543","https://openalex.org/W2964656262","https://openalex.org/W2964813902","https://openalex.org/W2970419734","https://openalex.org/W2970641574","https://openalex.org/W3023360076","https://openalex.org/W3035022648","https://openalex.org/W3042185737","https://openalex.org/W3132939488","https://openalex.org/W3153444823","https://openalex.org/W3175779740","https://openalex.org/W3192478068","https://openalex.org/W3199662997","https://openalex.org/W3211758383","https://openalex.org/W3215701899","https://openalex.org/W4205480693","https://openalex.org/W4210706177","https://openalex.org/W4220828618","https://openalex.org/W4378420555","https://openalex.org/W4385245566","https://openalex.org/W4386074468","https://openalex.org/W4391494845","https://openalex.org/W4404784041","https://openalex.org/W6605399267","https://openalex.org/W6675246850","https://openalex.org/W6682631176","https://openalex.org/W6727690538","https://openalex.org/W6731795565","https://openalex.org/W6737479944","https://openalex.org/W6745050457","https://openalex.org/W6755207826","https://openalex.org/W6769627184","https://openalex.org/W6771915120","https://openalex.org/W6776048684","https://openalex.org/W6810311456","https://openalex.org/W6811165306","https://openalex.org/W6811207682","https://openalex.org/W6857673048","https://openalex.org/W6862396702","https://openalex.org/W6869568491"],"related_works":["https://openalex.org/W2366403280","https://openalex.org/W1495108544","https://openalex.org/W2091301346","https://openalex.org/W3148229873","https://openalex.org/W4389760904","https://openalex.org/W2150160875","https://openalex.org/W4242223894","https://openalex.org/W4306886878","https://openalex.org/W2973759123","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Automatic":[0],"Text":[1],"Summarization":[2],"(ATS)":[3],"systems":[4],"aim":[5],"to":[6,125,197],"generate":[7],"concise":[8],"summaries":[9],"of":[10,37,66,99,106,118,129,205,215],"documents":[11,84],"while":[12],"preserving":[13],"their":[14],"essential":[15],"aspects":[16],"using":[17,82],"either":[18],"extractive":[19],"or":[20],"abstractive":[21,53,224],"approaches.":[22],"Transformer-based":[23],"ATS":[24],"methods":[25,192],"have":[26],"achieved":[27],"success":[28],"in":[29,39,60,200,203],"various":[30],"domains;":[31],"however,":[32],"there":[33],"is":[34,63,219],"a":[35,49,79,108,146,165],"lack":[36,65],"research":[38],"the":[40,64,87,97,104,115,127,130,141,153,156,160,213,220],"historical":[41,54,70,225],"domain.":[42],"In":[43],"this":[44,61,75,218],"paper,":[45],"we":[46,77,102,163],"introduce":[47],"HistBERTSum-Abs,":[48],"novel":[50,166],"method":[51,135,186],"for":[52,69,173],"single-document":[55],"summarization.":[56,72,227],"A":[57],"major":[58],"challenge":[59],"task":[62],"annotated":[67],"datasets":[68],"text":[71,226],"To":[73,151,212],"address":[74,152],"issue,":[76],"create":[78],"new":[80],"dataset":[81,181],"archived":[83],"obtained":[85],"from":[86],"Centre":[88],"Virtuel":[89],"de":[90],"la":[91],"Connaissance":[92],"sur":[93],"l\u2019Europe":[94],"group":[95],"at":[96],"University":[98],"Luxembourg.":[100],"Furthermore,":[101],"leverage":[103],"potential":[105],"HistBERT,":[107],"domain-specific":[109],"bidirectional":[110],"language":[111],"model":[112],"trained":[113],"on":[114,178,223],"balanced":[116],"Corpus":[117],"Historical":[119],"American":[120],"English,":[121],"(<uri":[122],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[123],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">https://www.english-corpora.org/coha/</uri>)":[124],"capture":[126],"semantics":[128],"input":[131],"documents.":[132],"Specifically,":[133],"our":[134,179,184,216],"adopts":[136],"an":[137],"encoder-decoder":[138],"architecture,":[139],"combining":[140],"pre-trained":[142,157],"HistBERT":[143],"encoder":[144,158],"with":[145],"randomly":[147],"initialized":[148],"Transformer":[149],"decoder.":[150],"mismatch":[154],"between":[155],"and":[159,193,208],"non-pre-trained":[161],"decoder,":[162],"employ":[164],"fine-tuning":[167],"schedule":[168],"that":[169,183],"uses":[170],"different":[171],"optimizers":[172],"each":[174],"component.":[175],"Experimental":[176],"results":[177,195],"constructed":[180],"demonstrate":[182],"HistBERTSum-Abs":[185],"outperforms":[187],"recent":[188],"state-of-the-art":[189,198],"deep":[190],"learning-based":[191],"achieves":[194],"comparable":[196],"LLMs":[199],"zero-shot":[201],"settings":[202],"terms":[204],"ROUGE-1,":[206],"ROUGE-2,":[207],"ROUGE-L":[209],"F1":[210],"scores.":[211],"best":[214],"knowledge,":[217],"first":[221],"work":[222]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6}],"updated_date":"2026-05-23T08:51:43.019350","created_date":"2025-10-10T00:00:00"}
