{"id":"https://openalex.org/W7126376405","doi":"https://doi.org/10.18653/v1/2024.findings-eacl.10","title":"A Methodology for Generative Spelling Correction via Natural Spelling Errors Emulation across Multiple Domains and Languages","display_name":"A Methodology for Generative Spelling Correction via Natural Spelling Errors Emulation across Multiple Domains and Languages","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W7126376405","doi":"https://doi.org/10.18653/v1/2024.findings-eacl.10"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2024.findings-eacl.10","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.findings-eacl.10","pdf_url":"https://aclanthology.org/2024.findings-eacl.10.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EACL 2024","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2024.findings-eacl.10.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071748067","display_name":"Nikita Martynov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nikita Martynov","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124596187","display_name":"Mark Baushenko","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mark Baushenko","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111120466","display_name":"Anastasia Kozlova","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anastasia Kozlova","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092671748","display_name":"Katerina Kolomeytseva","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Katerina Kolomeytseva","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124468056","display_name":"Aleksandr Abramov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aleksandr Abramov","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5001784131","display_name":"Alena Fenogenova","orcid":"https://orcid.org/0000-0003-3139-1668"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alena Fenogenova","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.57223152,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"138","last_page":"155"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2533999979496002,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2533999979496002,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.20880000293254852,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.15729999542236328,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spelling","display_name":"Spelling","score":0.771399974822998},{"id":"https://openalex.org/keywords/emulation","display_name":"Emulation","score":0.7461000084877014},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5304999947547913},{"id":"https://openalex.org/keywords/natural","display_name":"Natural (archaeology)","score":0.44690001010894775},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.43309998512268066}],"concepts":[{"id":"https://openalex.org/C2777801307","wikidata":"https://www.wikidata.org/wiki/Q2088390","display_name":"Spelling","level":2,"score":0.771399974822998},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7502999901771545},{"id":"https://openalex.org/C149810388","wikidata":"https://www.wikidata.org/wiki/Q5374873","display_name":"Emulation","level":2,"score":0.7461000084877014},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6380000114440918},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6269000172615051},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5304999947547913},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.44690001010894775},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.43309998512268066},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.305400013923645},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.30239999294281006},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.29580000042915344},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2024.findings-eacl.10","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.findings-eacl.10","pdf_url":"https://aclanthology.org/2024.findings-eacl.10.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EACL 2024","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2024.findings-eacl.10","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.findings-eacl.10","pdf_url":"https://aclanthology.org/2024.findings-eacl.10.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EACL 2024","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7517560124397278,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7126376405.pdf","grobid_xml":"https://content.openalex.org/works/W7126376405.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1,48],"models":[2,153],"excel":[3],"in":[4,13,18,62,72,87,144],"text":[5,14,89],"generation":[6],"and":[7,22,38,41,60,64,91,116,128,142,147,150,156,174],"generalization,":[8],"however":[9],"they":[10],"face":[11],"challenges":[12],"editing":[15],"tasks,":[16],"especially":[17],"correcting":[19],"spelling":[20,32,58,94,123],"errors":[21,59,68,111],"mistyping.In":[23],"this":[24],"paper,":[25],"we":[26,166],"present":[27],"a":[28,105,113,160],"methodology":[29],"for":[30],"generative":[31,77],"correction":[33],"(SC),":[34],"tested":[35],"on":[36,55],"English":[37],"Russian":[39],"languages":[40],"potentially":[42],"can":[43,69],"be":[44,70],"extended":[45],"to":[46,75],"any":[47],"with":[49],"minor":[50],"changes.Our":[51],"research":[52],"mainly":[53],"focuses":[54],"exploring":[56],"natural":[57],"mistyping":[61],"texts":[63],"studying":[65],"how":[66],"those":[67],"emulated":[71],"correct":[73],"sentences":[74],"enrich":[76],"models'":[78,140],"pre-train":[79],"procedure":[80],"effectively.We":[81],"investigate":[82],"the":[83,120,132,145,152],"effects":[84],"of":[85,110,163],"emulations":[86],"various":[88,137],"domains":[90],"examine":[92],"two":[93],"corruption":[95,138],"techniques:":[96],"1)":[97],"first":[98],"one":[99],"mimics":[100],"human":[101],"behavior":[102],"when":[103],"making":[104],"mistake":[106],"through":[107],"leveraging":[108],"statistics":[109],"from":[112],"particular":[114],"dataset,":[115],"2)":[117],"second":[118],"adds":[119],"most":[121],"common":[122],"errors,":[124],"keyboard":[125],"miss":[126],"clicks,":[127],"some":[129],"heuristics":[130],"within":[131],"texts.We":[133],"conducted":[134],"experiments":[135],"employing":[136],"strategies,":[139],"architectures,":[141],"sizes":[143],"pre-training":[146],"fine-tuning":[148],"stages":[149],"evaluated":[151],"using":[154],"single-domain":[155],"multi-domain":[157],"test":[158],"sets.As":[159],"practical":[161],"outcome":[162],"our":[164],"work,":[165],"introduce":[167],"SAGE":[168],"1":[169],"(Spell":[170],"checking":[171],"via":[172],"Augmentation":[173],"Generative":[175],"distribution":[176],"Emulation).":[177]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-02T00:00:00"}
