{"id":"https://openalex.org/W7128808660","doi":"https://doi.org/10.48550/arxiv.2602.11968","title":"DHPLT: large-scale multilingual diachronic corpora and word representations for semantic change modelling","display_name":"DHPLT: large-scale multilingual diachronic corpora and word representations for semantic change modelling","publication_year":2026,"publication_date":"2026-02-12","ids":{"openalex":"https://openalex.org/W7128808660","doi":"https://doi.org/10.48550/arxiv.2602.11968"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.11968","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125913568","display_name":"Mariia Fedorova","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fedorova, Mariia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071409817","display_name":"Andrey Kutuzov","orcid":"https://orcid.org/0000-0003-2540-5912"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kutuzov, Andrey","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5035353318","display_name":"Khonzoda Umarova","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Umarova, Khonzoda","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.8707000017166138,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.8707000017166138,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.0430000014603138,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.02199999988079071,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.6632999777793884},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6227999925613403},{"id":"https://openalex.org/keywords/semantic-change","display_name":"Semantic change","score":0.5680000185966492},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.5350000262260437},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.48190000653266907},{"id":"https://openalex.org/keywords/period","display_name":"Period (music)","score":0.4092999994754791},{"id":"https://openalex.org/keywords/timestamp","display_name":"Timestamp","score":0.38519999384880066},{"id":"https://openalex.org/keywords/dozen","display_name":"Dozen","score":0.33250001072883606}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.78329998254776},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.6632999777793884},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6449999809265137},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6241000294685364},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6227999925613403},{"id":"https://openalex.org/C36391188","wikidata":"https://www.wikidata.org/wiki/Q1939117","display_name":"Semantic change","level":2,"score":0.5680000185966492},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.5350000262260437},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.48190000653266907},{"id":"https://openalex.org/C2781291010","wikidata":"https://www.wikidata.org/wiki/Q178580","display_name":"Period (music)","level":2,"score":0.4092999994754791},{"id":"https://openalex.org/C113954288","wikidata":"https://www.wikidata.org/wiki/Q186885","display_name":"Timestamp","level":2,"score":0.38519999384880066},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.33809998631477356},{"id":"https://openalex.org/C185181809","wikidata":"https://www.wikidata.org/wiki/Q605704","display_name":"Dozen","level":2,"score":0.33250001072883606},{"id":"https://openalex.org/C175293574","wikidata":"https://www.wikidata.org/wiki/Q697133","display_name":"Word lists by frequency","level":3,"score":0.3163999915122986},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.31380000710487366},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.31349998712539673},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.2987000048160553},{"id":"https://openalex.org/C126706616","wikidata":"https://www.wikidata.org/wiki/Q2944660","display_name":"Lexical item","level":2,"score":0.2890999913215637},{"id":"https://openalex.org/C67277372","wikidata":"https://www.wikidata.org/wiki/Q7449085","display_name":"Semantic role labeling","level":3,"score":0.28220000863075256},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.267300009727478},{"id":"https://openalex.org/C2776848632","wikidata":"https://www.wikidata.org/wiki/Q853463","display_name":"Clipping (morphology)","level":2,"score":0.266400009393692},{"id":"https://openalex.org/C69075417","wikidata":"https://www.wikidata.org/wiki/Q515701","display_name":"Linked data","level":3,"score":0.2648000121116638},{"id":"https://openalex.org/C2780403423","wikidata":"https://www.wikidata.org/wiki/Q6537700","display_name":"Lexical database","level":3,"score":0.26089999079704285},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.25780001282691956}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.11968","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.11968","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.11968","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.11968","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"this":[1,132,139],"resource":[2],"paper,":[3],"we":[4,25],"present":[5],"DHPLT,":[6],"an":[7],"open":[8,81],"collection":[9,39],"of":[10,34,106,117,127],"diachronic":[11,108],"corpora":[12,109],"in":[13,102,131,138],"41":[14],"diverse":[15],"languages.":[16],"DHPLT":[17,98],"is":[18],"based":[19],"on":[20],"the":[21,31,76,83,95,103,122,135],"web-crawled":[22],"HPLT":[23],"datasets;":[24],"use":[26],"web":[27],"crawl":[28],"timestamps":[29],"as":[30],"approximate":[32],"signal":[33],"document":[35],"creation":[36],"time.":[37],"The":[38],"covers":[40],"three":[41],"time":[42,52,78],"periods:":[43],"2011-2015,":[44],"2020-2021":[45],"and":[46,63,66],"2024-present":[47],"(1":[48],"million":[49],"documents":[50],"per":[51],"period":[53],"for":[54,69,82,110,124],"each":[55],"language).":[56],"We":[57],"additionally":[58],"provide":[59],"pre-computed":[60],"word":[61],"type":[62],"token":[64],"embeddings":[65],"lexical":[67],"substitutions":[68],"our":[70],"chosen":[71],"target":[72,92],"words,":[73],"while":[74],"at":[75,100,143],"same":[77,96],"leaving":[79],"it":[80],"other":[84],"researchers":[85],"to":[86],"come":[87],"up":[88],"with":[89],"their":[90],"own":[91],"words":[93],"using":[94],"datasets.":[97],"aims":[99],"filling":[101],"current":[104],"lack":[105],"multilingual":[107],"semantic":[111],"change":[112],"modelling":[113],"(beyond":[114],"a":[115,125],"dozen":[116],"high-resource":[118],"languages).":[119],"It":[120],"opens":[121],"way":[123],"variety":[126],"new":[128],"experimental":[129],"setups":[130],"field.":[133],"All":[134],"resources":[136],"described":[137],"paper":[140],"are":[141],"available":[142],"https://data.hplt-project.org/three/diachronic/,":[144],"sorted":[145],"by":[146],"language.":[147]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-14T00:00:00"}
