{"id":"https://openalex.org/W2154708013","doi":"https://doi.org/10.1145/1568296.1568310","title":"Parallel identification of the spelling variants in corpora","display_name":"Parallel identification of the spelling variants in corpora","publication_year":2009,"publication_date":"2009-07-23","ids":{"openalex":"https://openalex.org/W2154708013","doi":"https://doi.org/10.1145/1568296.1568310","mag":"2154708013"},"language":"en","primary_location":{"id":"doi:10.1145/1568296.1568310","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1568296.1568310","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of The Third Workshop on Analytics for Noisy Unstructured Text Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006437012","display_name":"Martin Reynaert","orcid":null},"institutions":[{"id":"https://openalex.org/I193700539","display_name":"Tilburg University","ror":"https://ror.org/04b8v1s79","country_code":"NL","type":"education","lineage":["https://openalex.org/I193700539"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Martin Reynaert","raw_affiliation_strings":["Tilburg University, The Netherlands"],"affiliations":[{"raw_affiliation_string":"Tilburg University, The Netherlands","institution_ids":["https://openalex.org/I193700539"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5006437012"],"corresponding_institution_ids":["https://openalex.org/I193700539"],"apc_list":null,"apc_paid":null,"fwci":0.8724,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.81356816,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"77","last_page":"84"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/spelling","display_name":"Spelling","score":0.754859209060669},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7232575416564941},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.6451572179794312},{"id":"https://openalex.org/keywords/confusion","display_name":"Confusion","score":0.6390289068222046},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.6341431736946106},{"id":"https://openalex.org/keywords/anagram","display_name":"Anagram","score":0.6113697290420532},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5776500701904297},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5516218543052673},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.5275219082832336},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5264297723770142},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5099564790725708},{"id":"https://openalex.org/keywords/hash-function","display_name":"Hash function","score":0.48758357763290405},{"id":"https://openalex.org/keywords/literal","display_name":"Literal (mathematical logic)","score":0.4360356628894806},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.2565896213054657},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1672757863998413},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.13579782843589783},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.1045902669429779}],"concepts":[{"id":"https://openalex.org/C2777801307","wikidata":"https://www.wikidata.org/wiki/Q2088390","display_name":"Spelling","level":2,"score":0.754859209060669},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7232575416564941},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.6451572179794312},{"id":"https://openalex.org/C2781140086","wikidata":"https://www.wikidata.org/wiki/Q557945","display_name":"Confusion","level":2,"score":0.6390289068222046},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.6341431736946106},{"id":"https://openalex.org/C2777996765","wikidata":"https://www.wikidata.org/wiki/Q122981","display_name":"Anagram","level":3,"score":0.6113697290420532},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5776500701904297},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5516218543052673},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.5275219082832336},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5264297723770142},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5099564790725708},{"id":"https://openalex.org/C99138194","wikidata":"https://www.wikidata.org/wiki/Q183427","display_name":"Hash function","level":2,"score":0.48758357763290405},{"id":"https://openalex.org/C2780882242","wikidata":"https://www.wikidata.org/wiki/Q14235582","display_name":"Literal (mathematical logic)","level":2,"score":0.4360356628894806},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2565896213054657},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1672757863998413},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.13579782843589783},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.1045902669429779},{"id":"https://openalex.org/C44870925","wikidata":"https://www.wikidata.org/wiki/Q37547","display_name":"Astrophysics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C11171543","wikidata":"https://www.wikidata.org/wiki/Q41630","display_name":"Psychoanalysis","level":1,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1568296.1568310","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1568296.1568310","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of The Third Workshop on Analytics for Noisy Unstructured Text Data","raw_type":"proceedings-article"},{"id":"pmh:oai:tilburguniversity.edu:openaire_cris_publications/c46d127a-3ed7-43a1-9a22-d830dfbad5ba","is_oa":false,"landing_page_url":"https://research.tilburguniversity.edu/en/publications/c46d127a-3ed7-43a1-9a22-d830dfbad5ba","pdf_url":null,"source":{"id":"https://openalex.org/S4306401490","display_name":"Research portal (Tilburg University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I193700539","host_organization_name":"Tilburg University","host_organization_lineage":["https://openalex.org/I193700539"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Reynaert, M W C 2009, Parallel identification of the spelling variants in corpora. in D Lopresti, S Roy, K Schulz & L Venkata Subramaniam (eds), Proceedings of the Third workshop on Analytics for Noisy Unstructured Text Data 2009. Unknown Publisher, Barcelona, Spain, pp. 77-84. https://doi.org/10.1145/1568296.1568310","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7300000190734863,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321800","display_name":"Nederlandse Organisatie voor Wetenschappelijk Onderzoek","ror":"https://ror.org/04jsz6e67"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W66690650","https://openalex.org/W1647671624","https://openalex.org/W1699166917","https://openalex.org/W1973235167","https://openalex.org/W2001496424","https://openalex.org/W2066792529","https://openalex.org/W2116533968","https://openalex.org/W2119224635","https://openalex.org/W2144845931","https://openalex.org/W2150102617","https://openalex.org/W2406102681","https://openalex.org/W3041102165","https://openalex.org/W4243992055"],"related_works":["https://openalex.org/W1983724076","https://openalex.org/W2143935516","https://openalex.org/W2020573679","https://openalex.org/W2332886173","https://openalex.org/W1963562434","https://openalex.org/W2126054115","https://openalex.org/W2314395084","https://openalex.org/W2149685959","https://openalex.org/W2884918607","https://openalex.org/W2100947578"],"abstract_inverted_index":{"We":[0,81,146],"present":[1,82],"a":[2,28,56,77,83,91,155],"new":[3],"approach":[4],"based":[5],"on":[6,106,134],"anagram":[7],"hashing":[8],"to":[9,122,160],"globally":[10],"handle":[11],"the":[12,52,66,113,119,124,141],"typographical":[13],"variation":[14,23],"in":[15,27,63,99,102,118],"large":[16],"and":[17,69],"possibly":[18],"noisy":[19],"text":[20,34,47,116,120,144],"collections.":[21],"Typographical":[22],"is":[24,41,132],"typically":[25],"handled":[26],"local":[29],"fashion:":[30],"given":[31,57,90],"one":[32,70,103],"particular":[33,53,78,93,125],"string":[35,54,68],"some":[36],"system":[37],"of":[38,59,71,86,115,154],"retrieving":[39],"near-neighbours":[40,44,74],"applied,":[42],"where":[43],"are":[45],"other":[46],"strings":[48,117],"that":[49],"differ":[50],"from":[51,140],"by":[55],"number":[58],"characters.":[60],"The":[61,128],"difference":[62],"characters":[64],"between":[65],"original":[67],"its":[72,149,161],"retrieved":[73],"we":[75,96],"call":[76],"character":[79,94],"confusion.":[80],"global":[84],"way":[85],"performing":[87],"this":[88],"action:":[89],"possible":[92],"confusion,":[95],"identify":[97],"-":[98,111],"parallel,":[100],"i.e.":[101],"single":[104],"operation":[105],"anagram-hash":[107],"derived":[108],"bit":[109],"vectors":[110],"all":[112],"pairs":[114],"collection":[121],"which":[123],"confusion":[126],"applies.":[127],"algorithm":[129],"proposed":[130],"here":[131],"evaluated":[133],"about":[135],"23,000":[136],"English":[137],"attested":[138],"typos":[139],"Reuters":[142],"rcv1":[143],"collection.":[145],"further":[147],"explore":[148],"usefulness":[150],"for":[151],"unsupervised":[152],"linking":[153],"historical":[156],"Dutch":[157],"word":[158],"list":[159],"contemporary":[162],"counterpart.":[163]},"counts_by_year":[{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":3},{"year":2015,"cited_by_count":1},{"year":2013,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
