{"id":"https://openalex.org/W4389757577","doi":"https://doi.org/10.46298/jdmdh.12689","title":"SegmOnto: A Controlled Vocabulary to Describe and Process Digital Facsimiles","display_name":"SegmOnto: A Controlled Vocabulary to Describe and Process Digital Facsimiles","publication_year":2024,"publication_date":"2024-12-17","ids":{"openalex":"https://openalex.org/W4389757577","doi":"https://doi.org/10.46298/jdmdh.12689"},"language":"en","primary_location":{"id":"doi:10.46298/jdmdh.12689","is_oa":true,"landing_page_url":"https://doi.org/10.46298/jdmdh.12689","pdf_url":"https://jdmdh.episciences.org/14953/pdf","source":{"id":"https://openalex.org/S2736708624","display_name":"Journal of Data Mining & Digital Humanities","issn_l":"2416-5999","issn":["2416-5999"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311340","host_organization_name":"Nicolas Turenne","host_organization_lineage":["https://openalex.org/P4310311340"],"host_organization_lineage_names":["Nicolas Turenne"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data Mining &amp; Digital Humanities","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://jdmdh.episciences.org/14953/pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083756330","display_name":"Simon Gabay","orcid":"https://orcid.org/0000-0001-9094-4475"},"institutions":[{"id":"https://openalex.org/I114457229","display_name":"University of Geneva","ror":"https://ror.org/01swzsf04","country_code":"CH","type":"education","lineage":["https://openalex.org/I114457229"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Simon Gabay","raw_affiliation_strings":["University of Geneva","Universit\u00e9 de Gen\u00e8ve = University of Geneva"],"affiliations":[{"raw_affiliation_string":"University of Geneva","institution_ids":["https://openalex.org/I114457229"]},{"raw_affiliation_string":"Universit\u00e9 de Gen\u00e8ve = University of Geneva","institution_ids":["https://openalex.org/I114457229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090258074","display_name":"Ariane Pinche","orcid":"https://orcid.org/0000-0002-7843-5050"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Ariane Pinche","raw_affiliation_strings":["Centre National de la Recherche Scientifique","Histoire, Arch\u00e9ologie et Litt\u00e9ratures des mondes chr\u00e9tiens et musulmans m\u00e9di\u00e9vaux"],"affiliations":[{"raw_affiliation_string":"Centre National de la Recherche Scientifique","institution_ids":["https://openalex.org/I1294671590"]},{"raw_affiliation_string":"Histoire, Arch\u00e9ologie et Litt\u00e9ratures des mondes chr\u00e9tiens et musulmans m\u00e9di\u00e9vaux","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071217511","display_name":"Kelly Christensen","orcid":"https://orcid.org/0000-0002-7236-874X"},"institutions":[{"id":"https://openalex.org/I205092303","display_name":"Institut d'Etudes Politiques de Paris","ror":"https://ror.org/05fe7ax82","country_code":"FR","type":"facility","lineage":["https://openalex.org/I205092303"]},{"id":"https://openalex.org/I4210100316","display_name":"\u00c9cole Nationale des Chartes","ror":"https://ror.org/013xvg556","country_code":"FR","type":"education","lineage":["https://openalex.org/I2746051580","https://openalex.org/I4210100316"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Kelly Christensen","raw_affiliation_strings":["Sciences Po","Universit\u00e9 Paris Sciences et Lettres","\u00c9cole nationale des chartes"],"affiliations":[{"raw_affiliation_string":"Sciences Po","institution_ids":["https://openalex.org/I205092303"]},{"raw_affiliation_string":"Universit\u00e9 Paris Sciences et Lettres","institution_ids":[]},{"raw_affiliation_string":"\u00c9cole nationale des chartes","institution_ids":["https://openalex.org/I4210100316"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078767816","display_name":"Jean-Baptiste Camps","orcid":"https://orcid.org/0000-0003-0385-7037"},"institutions":[{"id":"https://openalex.org/I4210100316","display_name":"\u00c9cole Nationale des Chartes","ror":"https://ror.org/013xvg556","country_code":"FR","type":"education","lineage":["https://openalex.org/I2746051580","https://openalex.org/I4210100316"]},{"id":"https://openalex.org/I4210102894","display_name":"Clinique Saint Jean","ror":"https://ror.org/01dd1x730","country_code":"BE","type":"healthcare","lineage":["https://openalex.org/I4210102894"]}],"countries":["BE","FR"],"is_corresponding":false,"raw_author_name":"Jean-Baptiste Camps","raw_affiliation_strings":["Centre Jean Mabillon","Universit\u00e9 Paris Sciences et Lettres","\u00c9cole nationale des chartes"],"affiliations":[{"raw_affiliation_string":"Centre Jean Mabillon","institution_ids":["https://openalex.org/I4210102894"]},{"raw_affiliation_string":"Universit\u00e9 Paris Sciences et Lettres","institution_ids":[]},{"raw_affiliation_string":"\u00c9cole nationale des chartes","institution_ids":["https://openalex.org/I4210100316"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5083756330"],"corresponding_institution_ids":["https://openalex.org/I114457229"],"apc_list":null,"apc_paid":null,"fwci":15.5668,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.98285939,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12377","display_name":"Digital Humanities and Scholarship","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8448047637939453},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6705572605133057},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6327583193778992},{"id":"https://openalex.org/keywords/facsimile","display_name":"Facsimile","score":0.5889207720756531},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.5846419930458069},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5710009932518005},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5152009129524231},{"id":"https://openalex.org/keywords/document-structure-description","display_name":"Document Structure Description","score":0.4927885830402374},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.4846706688404083},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4681945741176605},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.449537068605423},{"id":"https://openalex.org/keywords/document-processing","display_name":"Document processing","score":0.4476010799407959},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.33722057938575745},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3012857437133789},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.2691740393638611},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.14464253187179565},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1403655707836151}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8448047637939453},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6705572605133057},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6327583193778992},{"id":"https://openalex.org/C2778310199","wikidata":"https://www.wikidata.org/wiki/Q194070","display_name":"Facsimile","level":3,"score":0.5889207720756531},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.5846419930458069},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5710009932518005},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5152009129524231},{"id":"https://openalex.org/C68699486","wikidata":"https://www.wikidata.org/wiki/Q265904","display_name":"Document Structure Description","level":3,"score":0.4927885830402374},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.4846706688404083},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4681945741176605},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.449537068605423},{"id":"https://openalex.org/C67905146","wikidata":"https://www.wikidata.org/wiki/Q5287646","display_name":"Document processing","level":2,"score":0.4476010799407959},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.33722057938575745},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3012857437133789},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2691740393638611},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.14464253187179565},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1403655707836151},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C761482","wikidata":"https://www.wikidata.org/wiki/Q118093","display_name":"Transmission (telecommunications)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.46298/jdmdh.12689","is_oa":true,"landing_page_url":"https://doi.org/10.46298/jdmdh.12689","pdf_url":"https://jdmdh.episciences.org/14953/pdf","source":{"id":"https://openalex.org/S2736708624","display_name":"Journal of Data Mining & Digital Humanities","issn_l":"2416-5999","issn":["2416-5999"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311340","host_organization_name":"Nicolas Turenne","host_organization_lineage":["https://openalex.org/P4310311340"],"host_organization_lineage_names":["Nicolas Turenne"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data Mining &amp; Digital Humanities","raw_type":"journal-article"},{"id":"pmh:oai:HAL:hal-04343404v2","is_oa":true,"landing_page_url":"https://hal.science/hal-04343404","pdf_url":null,"source":{"id":"https://openalex.org/S4406922466","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Journal of Data Mining and Digital Humanities, 2024, &#x27E8;10.46298/jdmdh.12689&#x27E9;","raw_type":"Journal articles"}],"best_oa_location":{"id":"doi:10.46298/jdmdh.12689","is_oa":true,"landing_page_url":"https://doi.org/10.46298/jdmdh.12689","pdf_url":"https://jdmdh.episciences.org/14953/pdf","source":{"id":"https://openalex.org/S2736708624","display_name":"Journal of Data Mining & Digital Humanities","issn_l":"2416-5999","issn":["2416-5999"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310311340","host_organization_name":"Nicolas Turenne","host_organization_lineage":["https://openalex.org/P4310311340"],"host_organization_lineage_names":["Nicolas Turenne"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Data Mining &amp; Digital Humanities","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.8199999928474426,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4389757577.pdf","grobid_xml":"https://content.openalex.org/works/W4389757577.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2161165996","https://openalex.org/W2013036611","https://openalex.org/W2604315690","https://openalex.org/W2361148114","https://openalex.org/W4205266410","https://openalex.org/W2488857217","https://openalex.org/W2768326997","https://openalex.org/W2326439708","https://openalex.org/W2053076471","https://openalex.org/W1971234693"],"abstract_inverted_index":{"Our":[0],"initiative":[1],"aims":[2],"at":[3],"designing":[4],"a":[5,19,25,33,58,98,151,158,161],"controlled":[6],"vocabulary":[7],"for":[8,79,86,104,164],"the":[9,12,42,54,62,67,74,90,95,105,122,125,129,134],"description":[10,56],"of":[11,14,41,53,69,76,97,107,136,160],"layout":[13,55,152],"textual":[15],"sources:":[16],"SegmOnto.":[17],"Following":[18],"more":[20],"physical":[21],"approach":[22],"rather":[23,46],"than":[24,47],"strictly":[26],"semantic":[27],"one,":[28],"it":[29,65,93],"is":[30],"designed":[31],"as":[32,118,120],"pragmatic":[34],"and":[35,72,102,128,155],"generic":[36,162],"typology,":[37],"coping":[38],"with":[39],"most":[40],"Western":[43],"historical":[44],"documents":[45],"answering":[48],"specific":[49],"needs.":[50],"The":[51],"harmonisation":[52],"has":[57],"double":[59],"objective:":[60],"on":[61,89],"one":[63],"hand":[64,92],"facilitates":[66],"mutualisation":[68],"annotated":[70],"data":[71,145],"therefore":[73],"training":[75],"better":[77],"models":[78],"page":[80],"segmentation":[81],"(a":[82],"crucial":[83],"preliminary":[84],"step":[85],"text":[87],"recognition),":[88],"other":[91],"allows":[94],"development":[96],"shared":[99],"post-processing":[100],"workflow":[101],"pipeline":[103,163],"transformation":[106],"ALTO":[108],"or":[109],"PAGE":[110],"files":[111],"into":[112,167],"DH":[113],"standard":[114],"formats,":[115],"which":[116],"preserves":[117],"much":[119],"possible":[121],"link":[123],"between":[124],"extracted":[126],"information":[127],"digital":[130],"facsimile.":[131],"To":[132],"demonstrate":[133],"capacity":[135],"SegmOnto":[137],"to":[138,149],"answer":[139],"both":[140],"these":[141],"objectives,":[142],"we":[143,156],"aggregate":[144],"from":[146],"multiple":[147],"projects":[148],"train":[150],"analysis":[153],"model,":[154],"propose":[157],"prototype":[159],"converting":[165],"ALTO-XMLs":[166],"XML-TEI.":[168]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
