{"id":"https://openalex.org/W4385469860","doi":"https://doi.org/10.1145/3604951.3605512","title":"DocLangID: Improving Few-Shot Training to Identify the Language of Historical Documents","display_name":"DocLangID: Improving Few-Shot Training to Identify the Language of Historical Documents","publication_year":2023,"publication_date":"2023-08-01","ids":{"openalex":"https://openalex.org/W4385469860","doi":"https://doi.org/10.1145/3604951.3605512"},"language":"en","primary_location":{"id":"doi:10.1145/3604951.3605512","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3604951.3605512","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th International Workshop on Historical Document Imaging and Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092578944","display_name":"Furkan Simsek","orcid":"https://orcid.org/0009-0001-8007-1014"},"institutions":[{"id":"https://openalex.org/I143288331","display_name":"Hasso Plattner Institute","ror":"https://ror.org/058rn5r42","country_code":"DE","type":"facility","lineage":["https://openalex.org/I143288331","https://openalex.org/I176453806"]},{"id":"https://openalex.org/I176453806","display_name":"University of Potsdam","ror":"https://ror.org/03bnmw459","country_code":"DE","type":"education","lineage":["https://openalex.org/I176453806"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Furkan Simsek","raw_affiliation_strings":["Hasso Plattner Institute, University of Potsdam, Germany"],"raw_orcid":"https://orcid.org/0009-0001-8007-1014","affiliations":[{"raw_affiliation_string":"Hasso Plattner Institute, University of Potsdam, Germany","institution_ids":["https://openalex.org/I143288331","https://openalex.org/I176453806"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069656048","display_name":"B. Pfitzmann","orcid":"https://orcid.org/0009-0003-9460-6467"},"institutions":[{"id":"https://openalex.org/I143288331","display_name":"Hasso Plattner Institute","ror":"https://ror.org/058rn5r42","country_code":"DE","type":"facility","lineage":["https://openalex.org/I143288331","https://openalex.org/I176453806"]},{"id":"https://openalex.org/I176453806","display_name":"University of Potsdam","ror":"https://ror.org/03bnmw459","country_code":"DE","type":"education","lineage":["https://openalex.org/I176453806"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Brian Pfitzmann","raw_affiliation_strings":["Hasso Plattner Institute, University of Potsdam, Germany"],"raw_orcid":"https://orcid.org/0009-0003-9460-6467","affiliations":[{"raw_affiliation_string":"Hasso Plattner Institute, University of Potsdam, Germany","institution_ids":["https://openalex.org/I143288331","https://openalex.org/I176453806"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068885760","display_name":"Hendrik Raetz","orcid":"https://orcid.org/0000-0002-1230-1236"},"institutions":[{"id":"https://openalex.org/I143288331","display_name":"Hasso Plattner Institute","ror":"https://ror.org/058rn5r42","country_code":"DE","type":"facility","lineage":["https://openalex.org/I143288331","https://openalex.org/I176453806"]},{"id":"https://openalex.org/I176453806","display_name":"University of Potsdam","ror":"https://ror.org/03bnmw459","country_code":"DE","type":"education","lineage":["https://openalex.org/I176453806"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Hendrik Raetz","raw_affiliation_strings":["Hasso Plattner Institute, University of Potsdam, Germany"],"raw_orcid":"https://orcid.org/0000-0002-1230-1236","affiliations":[{"raw_affiliation_string":"Hasso Plattner Institute, University of Potsdam, Germany","institution_ids":["https://openalex.org/I143288331","https://openalex.org/I176453806"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053166615","display_name":"Jona Otholt","orcid":"https://orcid.org/0009-0005-1071-1230"},"institutions":[{"id":"https://openalex.org/I143288331","display_name":"Hasso Plattner Institute","ror":"https://ror.org/058rn5r42","country_code":"DE","type":"facility","lineage":["https://openalex.org/I143288331","https://openalex.org/I176453806"]},{"id":"https://openalex.org/I176453806","display_name":"University of Potsdam","ror":"https://ror.org/03bnmw459","country_code":"DE","type":"education","lineage":["https://openalex.org/I176453806"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jona Otholt","raw_affiliation_strings":["Hasso Plattner Institute, University of Potsdam, Germany"],"raw_orcid":"https://orcid.org/0009-0005-1071-1230","affiliations":[{"raw_affiliation_string":"Hasso Plattner Institute, University of Potsdam, Germany","institution_ids":["https://openalex.org/I143288331","https://openalex.org/I176453806"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003983595","display_name":"Haojin Yang","orcid":"https://orcid.org/0000-0002-8733-5772"},"institutions":[{"id":"https://openalex.org/I143288331","display_name":"Hasso Plattner Institute","ror":"https://ror.org/058rn5r42","country_code":"DE","type":"facility","lineage":["https://openalex.org/I143288331","https://openalex.org/I176453806"]},{"id":"https://openalex.org/I176453806","display_name":"University of Potsdam","ror":"https://ror.org/03bnmw459","country_code":"DE","type":"education","lineage":["https://openalex.org/I176453806"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Haojin Yang","raw_affiliation_strings":["Hasso Plattner Institute, University of Potsdam, Germany"],"raw_orcid":"https://orcid.org/0000-0002-8733-5772","affiliations":[{"raw_affiliation_string":"Hasso Plattner Institute, University of Potsdam, Germany","institution_ids":["https://openalex.org/I143288331","https://openalex.org/I176453806"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102919398","display_name":"Christoph Meinel","orcid":"https://orcid.org/0000-0002-3410-3193"},"institutions":[{"id":"https://openalex.org/I143288331","display_name":"Hasso Plattner Institute","ror":"https://ror.org/058rn5r42","country_code":"DE","type":"facility","lineage":["https://openalex.org/I143288331","https://openalex.org/I176453806"]},{"id":"https://openalex.org/I176453806","display_name":"University of Potsdam","ror":"https://ror.org/03bnmw459","country_code":"DE","type":"education","lineage":["https://openalex.org/I176453806"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Christoph Meinel","raw_affiliation_strings":["Hasso Plattner Institute, University of Potsdam, Germany"],"raw_orcid":"https://orcid.org/0000-0002-3410-3193","affiliations":[{"raw_affiliation_string":"Hasso Plattner Institute, University of Potsdam, Germany","institution_ids":["https://openalex.org/I143288331","https://openalex.org/I176453806"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5092578944"],"corresponding_institution_ids":["https://openalex.org/I143288331","https://openalex.org/I176453806"],"apc_list":null,"apc_paid":null,"fwci":0.2355,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.51094986,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"103","last_page":"108"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9488999843597412,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8711327314376831},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7032014727592468},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.6736017465591431},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.6653723120689392},{"id":"https://openalex.org/keywords/adaptability","display_name":"Adaptability","score":0.6177076697349548},{"id":"https://openalex.org/keywords/extractor","display_name":"Extractor","score":0.5767461657524109},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5541234016418457},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.550547182559967},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.5112948417663574},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.48562565445899963},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.46583881974220276},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.41200676560401917},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35479170083999634}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8711327314376831},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7032014727592468},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.6736017465591431},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.6653723120689392},{"id":"https://openalex.org/C177606310","wikidata":"https://www.wikidata.org/wiki/Q5674297","display_name":"Adaptability","level":2,"score":0.6177076697349548},{"id":"https://openalex.org/C117978034","wikidata":"https://www.wikidata.org/wiki/Q5422192","display_name":"Extractor","level":2,"score":0.5767461657524109},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5541234016418457},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.550547182559967},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.5112948417663574},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.48562565445899963},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.46583881974220276},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.41200676560401917},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35479170083999634},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C21880701","wikidata":"https://www.wikidata.org/wiki/Q2144042","display_name":"Process engineering","level":1,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3604951.3605512","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3604951.3605512","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th International Workshop on Historical Document Imaging and Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8299999833106995,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2001642682","https://openalex.org/W2053050503","https://openalex.org/W2194775991","https://openalex.org/W2796346823","https://openalex.org/W2798836702","https://openalex.org/W2875814315","https://openalex.org/W2902561591","https://openalex.org/W2913219290","https://openalex.org/W2943258252","https://openalex.org/W2963626623","https://openalex.org/W3087359459","https://openalex.org/W4214872252","https://openalex.org/W4312115774"],"related_works":["https://openalex.org/W4295705264","https://openalex.org/W3193920202","https://openalex.org/W4318813552","https://openalex.org/W2576964996","https://openalex.org/W2130553454","https://openalex.org/W4317548404","https://openalex.org/W3022007134","https://openalex.org/W2087783760","https://openalex.org/W2033364610","https://openalex.org/W2797776314"],"abstract_inverted_index":{"In":[0],"this":[1,20],"work,":[2],"we":[3,36],"propose":[4],"DocLangID,":[5],"a":[6,27,38,45,74,90],"transfer":[7],"learning":[8,41],"approach":[9,42,131],"to":[10,43,49],"identify":[11],"the":[12,53,65,97,105,119,133,142,147],"language":[13,134],"of":[14,32,52,60,67,83,100,146],"unlabeled":[15,54,68,98,148],"historical":[16,33,84,125],"documents.":[17,34,85],"We":[18,86,109],"achieve":[19],"by":[21,102],"first":[22],"leveraging":[23],"labeled":[24,62],"data":[25,81],"from":[26,64],"different":[28,80],"but":[29],"related":[30],"domain":[31],"Secondly,":[35],"implement":[37],"distance-based":[39],"few-shot":[40,107],"adapt":[44],"convolutional":[46],"neural":[47],"network":[48],"new":[50,78],"languages":[51,115,145],"dataset.":[55,149],"By":[56],"introducing":[57],"small":[58],"amounts":[59],"manually":[61],"examples":[63],"set":[66,99],"images,":[69],"our":[70,111,129],"feature":[71],"extractor":[72],"develops":[73],"better":[75],"adaptability":[76],"towards":[77],"and":[79],"distributions":[82],"show":[87],"that":[88,116,128],"such":[89],"model":[91],"can":[92],"be":[93],"effectively":[94],"fine-tuned":[95],"for":[96],"images":[101],"only":[103],"reusing":[104],"same":[106],"examples.":[108],"showcase":[110],"work":[112],"across":[113],"10":[114],"mostly":[117],"use":[118],"Latin":[120],"script.":[121],"Our":[122],"experiments":[123],"on":[124,141],"documents":[126],"demonstrate":[127],"combined":[130],"improves":[132],"identification":[135],"performance,":[136],"achieving":[137],"74%":[138],"recognition":[139],"accuracy":[140],"four":[143],"unseen":[144]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2023-08-02T00:00:00"}
