{"id":"https://openalex.org/W2032156806","doi":"https://doi.org/10.1109/icdar.2007.4376993","title":"Deriving Symbol Dependent Edit Weights for Text Correction_The Use of Error Dictionaries","display_name":"Deriving Symbol Dependent Edit Weights for Text Correction_The Use of Error Dictionaries","publication_year":2007,"publication_date":"2007-09-01","ids":{"openalex":"https://openalex.org/W2032156806","doi":"https://doi.org/10.1109/icdar.2007.4376993","mag":"2032156806"},"language":"en","primary_location":{"id":"doi:10.1109/icdar.2007.4376993","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdar.2007.4376993","pdf_url":null,"source":{"id":"https://openalex.org/S4210215987","display_name":"Proceedings of the International Conference on Document Analysis and Recognition","issn_l":"1520-5363","issn":["1520-5363","2379-2140"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ninth International Conference on Document Analysis and Recognition (ICDAR 2007) Vol 2","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110059139","display_name":"Ch. Ringlstetter","orcid":null},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Ch. Ringlstetter","raw_affiliation_strings":["AICML, Department of Computing Science, University of Alberta, Canada","University of Alberta Edmonton"],"affiliations":[{"raw_affiliation_string":"AICML, Department of Computing Science, University of Alberta, Canada","institution_ids":["https://openalex.org/I154425047"]},{"raw_affiliation_string":"University of Alberta Edmonton","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013959345","display_name":"Ulrich Reffle","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127848","display_name":"Institut f\u00fcr Urheber- und Medienrecht","ror":"https://ror.org/035pp4s04","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210127848"]},{"id":"https://openalex.org/I8204097","display_name":"Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen","ror":"https://ror.org/05591te55","country_code":"DE","type":"education","lineage":["https://openalex.org/I8204097"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"U. Reffle","raw_affiliation_strings":["CIS, University of Munich (LMU), Germany","University of Munich"],"affiliations":[{"raw_affiliation_string":"CIS, University of Munich (LMU), Germany","institution_ids":["https://openalex.org/I8204097"]},{"raw_affiliation_string":"University of Munich","institution_ids":["https://openalex.org/I4210127848"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052161697","display_name":"Annette Gotscharek","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127848","display_name":"Institut f\u00fcr Urheber- und Medienrecht","ror":"https://ror.org/035pp4s04","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210127848"]},{"id":"https://openalex.org/I8204097","display_name":"Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen","ror":"https://ror.org/05591te55","country_code":"DE","type":"education","lineage":["https://openalex.org/I8204097"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"A. Gotscharek","raw_affiliation_strings":["CIS, University of Munich (LMU), Germany","University of Munich"],"affiliations":[{"raw_affiliation_string":"CIS, University of Munich (LMU), Germany","institution_ids":["https://openalex.org/I8204097"]},{"raw_affiliation_string":"University of Munich","institution_ids":["https://openalex.org/I4210127848"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109699155","display_name":"Klaus U. Schulz","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127848","display_name":"Institut f\u00fcr Urheber- und Medienrecht","ror":"https://ror.org/035pp4s04","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210127848"]},{"id":"https://openalex.org/I8204097","display_name":"Ludwig-Maximilians-Universit\u00e4t M\u00fcnchen","ror":"https://ror.org/05591te55","country_code":"DE","type":"education","lineage":["https://openalex.org/I8204097"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"K. Schulz","raw_affiliation_strings":["CIS, University of Munich (LMU), Germany","University of Munich"],"affiliations":[{"raw_affiliation_string":"CIS, University of Munich (LMU), Germany","institution_ids":["https://openalex.org/I8204097"]},{"raw_affiliation_string":"University of Munich","institution_ids":["https://openalex.org/I4210127848"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5110059139"],"corresponding_institution_ids":["https://openalex.org/I154425047"],"apc_list":null,"apc_paid":null,"fwci":1.1417,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.76492537,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"3","issue":null,"first_page":"639","last_page":"643"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/edit-distance","display_name":"Edit distance","score":0.8843103647232056},{"id":"https://openalex.org/keywords/levenshtein-distance","display_name":"Levenshtein distance","score":0.7849377393722534},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.7771949768066406},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.751102089881897},{"id":"https://openalex.org/keywords/symbol","display_name":"Symbol (formal)","score":0.6434686779975891},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.5338353514671326},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4795358180999756},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.435404896736145},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.37656357884407043},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3251291513442993},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3244880735874176}],"concepts":[{"id":"https://openalex.org/C44359876","wikidata":"https://www.wikidata.org/wiki/Q5338467","display_name":"Edit distance","level":2,"score":0.8843103647232056},{"id":"https://openalex.org/C2777515626","wikidata":"https://www.wikidata.org/wiki/Q496939","display_name":"Levenshtein distance","level":2,"score":0.7849377393722534},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.7771949768066406},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.751102089881897},{"id":"https://openalex.org/C134400042","wikidata":"https://www.wikidata.org/wiki/Q2372244","display_name":"Symbol (formal)","level":2,"score":0.6434686779975891},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.5338353514671326},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4795358180999756},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.435404896736145},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.37656357884407043},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3251291513442993},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3244880735874176},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icdar.2007.4376993","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdar.2007.4376993","pdf_url":null,"source":{"id":"https://openalex.org/S4210215987","display_name":"Proceedings of the International Conference on Document Analysis and Recognition","issn_l":"1520-5363","issn":["1520-5363","2379-2140"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ninth International Conference on Document Analysis and Recognition (ICDAR 2007) Vol 2","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7599999904632568,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W99399284","https://openalex.org/W1505906253","https://openalex.org/W1647671624","https://openalex.org/W1983410171","https://openalex.org/W2005661126","https://openalex.org/W2018366250","https://openalex.org/W2040102554","https://openalex.org/W2057900969","https://openalex.org/W2071003614","https://openalex.org/W2071663716","https://openalex.org/W2095747918","https://openalex.org/W2099510384","https://openalex.org/W2102443632","https://openalex.org/W2116533968","https://openalex.org/W2131677984","https://openalex.org/W2142268730","https://openalex.org/W2161691863","https://openalex.org/W4250519784","https://openalex.org/W6604065652","https://openalex.org/W6630268475","https://openalex.org/W6636915900"],"related_works":["https://openalex.org/W2461708070","https://openalex.org/W2003932770","https://openalex.org/W1531307672","https://openalex.org/W4362583275","https://openalex.org/W2575897682","https://openalex.org/W4321609555","https://openalex.org/W2844405045","https://openalex.org/W4280559639","https://openalex.org/W4285090010","https://openalex.org/W2788104449"],"abstract_inverted_index":{"Most":[0],"systems":[1,156],"for":[2,96],"correcting":[3],"errors":[4,60],"in":[5,168],"texts":[6],"make":[7],"use":[8],"of":[9,41,56,71,144],"specific":[10],"word":[11],"distance":[12],"measures":[13],"such":[14],"as":[15],"the":[16,38,42,69,145,162],"Levenshtein":[17],"distance.":[18],"In":[19,67,89],"many":[20],"experiments":[21],"it":[22],"has":[23],"been":[24],"shown":[25],"that":[26,35,83,102,161],"correction":[27,166],"accuracy":[28,167],"is":[29,77,136],"improved":[30],"when":[31],"using":[32],"edit":[33,43,85,100],"weights":[34,101],"depend":[36],"on":[37,53,116],"particular":[39],"symbols":[40],"opera-":[44],"tion.":[45],"However,":[46],"most":[47],"proposed":[48],"approaches":[49],"so":[50],"far":[51],"rely":[52],"high":[54],"amounts":[55],"training":[57,109],"data":[58,76,159],"where":[59,125],"and":[61,138,157],"their":[62],"cor-":[63],"rections":[64],"are":[65,87,129],"collected.":[66],"practice,":[68],"preparation":[70],"suit-":[72],"able":[73],"ground":[74,107,117,181],"truth":[75,108,118,182],"often":[78,172],"too":[79],"costly,":[80],"which":[81],"means":[82],"uniform":[84],"costs":[86],"used.":[88],"this":[90],"paper":[91],"we":[92],"evaluate":[93],"ap-":[94],"proaches":[95],"deriving":[97],"symbol":[98],"dependent":[99],"do":[103],"not":[104],"need":[105],"any":[106],"data,":[110],"comparing":[111],"them":[112],"with":[113,153,180],"methods":[114],"based":[115],"training.":[119,183],"We":[120],"suggest":[121],"a":[122,169],"new":[123],"approach":[124],"special":[126],"error":[127],"dictionaries":[128],"used":[130],"to":[131,147,174,177],"estimate":[132],"weights.":[133],"The":[134],"method":[135,163],"simple":[137],"very":[139],"efficient,":[140],"needing":[141],"one":[142],"pass":[143],"document":[146],"be":[148],"corrected.":[149],"Our":[150],"ex-":[151],"periments":[152],"different":[154],"OCR":[155],"textual":[158],"show":[160],"consistently":[164],"improves":[165],"significant":[170],"way,":[171],"leading":[173],"results":[175],"comparable":[176],"those":[178],"achieved":[179]},"counts_by_year":[{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
