{"id":"https://openalex.org/W3025988395","doi":"https://doi.org/10.46298/jdmdh.6485","title":"Corpus and Models for Lemmatisation and POS-tagging of Classical French Theatre","display_name":"Corpus and Models for Lemmatisation and POS-tagging of Classical French Theatre","publication_year":2021,"publication_date":"2021-02-14","ids":{"openalex":"https://openalex.org/W3025988395","doi":"https://doi.org/10.46298/jdmdh.6485","mag":"3025988395"},"language":"en","primary_location":{"id":"doi:10.46298/jdmdh.6485","is_oa":true,"landing_page_url":"https://doi.org/10.46298/jdmdh.6485","pdf_url":"https://jdmdh.episciences.org/7161/pdf","source":{"id":"https://openalex.org/S2736708624","display_name":"Journal of Data Mining & Digital Humanities","issn_l":"2416-5999","issn":["2416-5999"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311340","host_organization_name":"Nicolas Turenne","host_organization_lineage":["https://openalex.org/P4310311340"],"host_organization_lineage_names":["Nicolas Turenne"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data Mining &amp; Digital Humanities","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://jdmdh.episciences.org/7161/pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Jean-Baptiste Camps","orcid":"https://orcid.org/0000-0003-0385-7037"},"institutions":[{"id":"https://openalex.org/I4210100316","display_name":"\u00c9cole Nationale des Chartes","ror":"https://ror.org/013xvg556","country_code":"FR","type":"education","lineage":["https://openalex.org/I2746051580","https://openalex.org/I4210100316"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Jean-Baptiste Camps","raw_affiliation_strings":["Centre Jean-Mabillon, cole nationale des chartes, Universit Paris, Sciences & Lettres"],"affiliations":[{"raw_affiliation_string":"Centre Jean-Mabillon, cole nationale des chartes, Universit Paris, Sciences & Lettres","institution_ids":["https://openalex.org/I4210100316"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Simon Gabay","orcid":null},"institutions":[{"id":"https://openalex.org/I57825437","display_name":"University of Neuch\u00e2tel","ror":"https://ror.org/00vasag41","country_code":"CH","type":"education","lineage":["https://openalex.org/I57825437"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Simon Gabay","raw_affiliation_strings":["Universit de Neuchtel"],"affiliations":[{"raw_affiliation_string":"Universit de Neuchtel","institution_ids":["https://openalex.org/I57825437"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Paul Fi\u00e8vre","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Paul Fi\u00e8vre","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Thibault Cl\u00e9rice","orcid":"https://orcid.org/0000-0003-1852-9204"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thibault Cl\u00e9rice","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Florian Cafiero","orcid":"https://orcid.org/0000-0002-1951-6942"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"funder","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I4210094956","display_name":"Groupe d'\u00c9tude des M\u00e9thodes de l'Analyse Sociologique de la Sorbonne","ror":"https://ror.org/00kzsxx38","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I39804081","https://openalex.org/I4210094956","https://openalex.org/I4210150854"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Florian Cafiero","raw_affiliation_strings":["GEMASS, CNRS / Universit Paris-Sorbonne"],"affiliations":[{"raw_affiliation_string":"GEMASS, CNRS / Universit Paris-Sorbonne","institution_ids":["https://openalex.org/I4210094956","https://openalex.org/I1294671590"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I4210100316"],"apc_list":null,"apc_paid":null,"fwci":0.4192,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.66993243,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":97},"biblio":{"volume":"2021","issue":"Digital humanities in...","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.6823999881744385,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.6823999881744385,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.05559999868273735,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12119","display_name":"Linguistics and Discourse Analysis","score":0.02239999920129776,"subfield":{"id":"https://openalex.org/subfields/1211","display_name":"Philosophy"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.644599974155426},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5116999745368958},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4577000141143799},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.3546999990940094},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.33719998598098755}],"concepts":[{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.644599974155426},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5785999894142151},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5426999926567078},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5116999745368958},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5074999928474426},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4577000141143799},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3546999990940094},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3517000079154968},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.33719998598098755},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.33219999074935913},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2969000041484833},{"id":"https://openalex.org/C532629269","wikidata":"https://www.wikidata.org/wiki/Q865083","display_name":"Corpus linguistics","level":2,"score":0.290800005197525},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.27480000257492065},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.26339998841285706}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.46298/jdmdh.6485","is_oa":true,"landing_page_url":"https://doi.org/10.46298/jdmdh.6485","pdf_url":"https://jdmdh.episciences.org/7161/pdf","source":{"id":"https://openalex.org/S2736708624","display_name":"Journal of Data Mining & Digital Humanities","issn_l":"2416-5999","issn":["2416-5999"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311340","host_organization_name":"Nicolas Turenne","host_organization_lineage":["https://openalex.org/P4310311340"],"host_organization_lineage_names":["Nicolas Turenne"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data Mining &amp; Digital Humanities","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2005.07505","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2005.07505","pdf_url":"https://arxiv.org/pdf/2005.07505","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:HAL:halshs-02591388v2","is_oa":false,"landing_page_url":"https://shs.hal.science/halshs-02591388","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Data Mining and Digital Humanities, 2021, &#x27E8;10.46298/jdmdh.6485&#x27E9;","raw_type":"Journal articles"},{"id":"pmh:oai:doaj.org/article:40fdcff9955e49dfb7056d706af4eba2","is_oa":true,"landing_page_url":"https://doaj.org/article/40fdcff9955e49dfb7056d706af4eba2","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Data Mining and Digital Humanities, Vol 2021, Iss Digital humanities in... (2021)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.46298/jdmdh.6485","is_oa":true,"landing_page_url":"https://doi.org/10.46298/jdmdh.6485","pdf_url":"https://jdmdh.episciences.org/7161/pdf","source":{"id":"https://openalex.org/S2736708624","display_name":"Journal of Data Mining & Digital Humanities","issn_l":"2416-5999","issn":["2416-5999"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311340","host_organization_name":"Nicolas Turenne","host_organization_lineage":["https://openalex.org/P4310311340"],"host_organization_lineage_names":["Nicolas Turenne"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data Mining &amp; Digital Humanities","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3025988395.pdf","grobid_xml":"https://content.openalex.org/works/W3025988395.grobid-xml"},"referenced_works_count":12,"referenced_works":["https://openalex.org/W254022851","https://openalex.org/W2250471514","https://openalex.org/W2251386579","https://openalex.org/W2296753779","https://openalex.org/W2514031637","https://openalex.org/W2614307741","https://openalex.org/W2950176361","https://openalex.org/W2991260307","https://openalex.org/W3110008479","https://openalex.org/W6893521647","https://openalex.org/W6931406615","https://openalex.org/W6949690843"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"describes":[2],"the":[3,36,64,69],"process":[4],"of":[5,47],"building":[6],"an":[7],"annotated":[8],"corpus":[9],"and":[10,22,42,55,72],"training":[11],"models":[12],"for":[13],"classical":[14],"French":[15],"literature,":[16],"with":[17],"a":[18,32,48,56],"focus":[19],"on":[20,52,68],"theatre,":[21],"particularly":[23],"comedies":[24],"in":[25,40],"verse.":[26],"It":[27],"was":[28],"originally":[29],"developed":[30],"as":[31],"preliminary":[33],"step":[34],"to":[35,60,74,81],"stylometric":[37],"analyses":[38],"presented":[39],"Cafiero":[41],"Camps":[43],"[2019].":[44],"The":[45],"use":[46],"recent":[49],"lemmatiser":[50],"based":[51],"neural":[53],"networks":[54],"CRF":[57],"tagger":[58],"allows":[59],"achieve":[61],"accuracies":[62],"beyond":[63],"current":[65],"state-of-the":[66],"art":[67],"in-domain":[70],"test,":[71],"proves":[73],"be":[75],"robust":[76],"during":[77],"out-of-domain":[78],"tests,":[79],"i.e.up":[80],"20th":[82],"c.novels.":[83]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3}],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2020-05-21T00:00:00"}
