{"id":"https://openalex.org/W2092911974","doi":"https://doi.org/10.1145/1568296.1568315","title":"A survey of types of text noise and techniques to handle noisy text","display_name":"A survey of types of text noise and techniques to handle noisy text","publication_year":2009,"publication_date":"2009-07-23","ids":{"openalex":"https://openalex.org/W2092911974","doi":"https://doi.org/10.1145/1568296.1568315","mag":"2092911974"},"language":"en","primary_location":{"id":"doi:10.1145/1568296.1568315","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1568296.1568315","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of The Third Workshop on Analytics for Noisy Unstructured Text Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110629023","display_name":"L. Venkata Subramaniam","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]},{"id":"https://openalex.org/I4210103279","display_name":"IBM Research - India","ror":"https://ror.org/014wt7r80","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210103279","https://openalex.org/I4210114115"]}],"countries":["IN","US"],"is_corresponding":true,"raw_author_name":"L. Venkata Subramaniam","raw_affiliation_strings":["IBM India Research Lab, New Delhi, India","IBM, India Research Lab., New Delhi, India#TAB#"],"affiliations":[{"raw_affiliation_string":"IBM India Research Lab, New Delhi, India","institution_ids":["https://openalex.org/I4210103279"]},{"raw_affiliation_string":"IBM, India Research Lab., New Delhi, India#TAB#","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109095821","display_name":"Shourya Roy","orcid":null},"institutions":[{"id":"https://openalex.org/I33976269","display_name":"Xerox (France)","ror":"https://ror.org/033q0mv79","country_code":"FR","type":"company","lineage":["https://openalex.org/I33976269","https://openalex.org/I4210132870"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Shourya Roy","raw_affiliation_strings":["Xerox India Innovation Hub, Chennai, India","Xerox India Innovation Hub, Chennai, India#TAB#"],"affiliations":[{"raw_affiliation_string":"Xerox India Innovation Hub, Chennai, India","institution_ids":[]},{"raw_affiliation_string":"Xerox India Innovation Hub, Chennai, India#TAB#","institution_ids":["https://openalex.org/I33976269"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004954473","display_name":"Tanveer A. Faruquie","orcid":"https://orcid.org/0009-0008-9474-7928"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]},{"id":"https://openalex.org/I4210103279","display_name":"IBM Research - India","ror":"https://ror.org/014wt7r80","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210103279","https://openalex.org/I4210114115"]}],"countries":["IN","US"],"is_corresponding":false,"raw_author_name":"Tanveer A. Faruquie","raw_affiliation_strings":["IBM India Research Lab, New Delhi, India","IBM, India Research Lab., New Delhi, India#TAB#"],"affiliations":[{"raw_affiliation_string":"IBM India Research Lab, New Delhi, India","institution_ids":["https://openalex.org/I4210103279"]},{"raw_affiliation_string":"IBM, India Research Lab., New Delhi, India#TAB#","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109167303","display_name":"Sumit Negi","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]},{"id":"https://openalex.org/I4210103279","display_name":"IBM Research - India","ror":"https://ror.org/014wt7r80","country_code":"IN","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210103279","https://openalex.org/I4210114115"]}],"countries":["IN","US"],"is_corresponding":false,"raw_author_name":"Sumit Negi","raw_affiliation_strings":["IBM India Research Lab, New Delhi, India","IBM, India Research Lab., New Delhi, India#TAB#"],"affiliations":[{"raw_affiliation_string":"IBM India Research Lab, New Delhi, India","institution_ids":["https://openalex.org/I4210103279"]},{"raw_affiliation_string":"IBM, India Research Lab., New Delhi, India#TAB#","institution_ids":["https://openalex.org/I1341412227"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5110629023"],"corresponding_institution_ids":["https://openalex.org/I1341412227","https://openalex.org/I4210103279"],"apc_list":null,"apc_paid":null,"fwci":9.0714,"has_fulltext":false,"cited_by_count":83,"citation_normalized_percentile":{"value":0.97881335,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"115","last_page":"122"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8217369318008423},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.7077257633209229},{"id":"https://openalex.org/keywords/cover","display_name":"Cover (algebra)","score":0.5772016644477844},{"id":"https://openalex.org/keywords/text-processing","display_name":"Text processing","score":0.5339414477348328},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.5274819731712341},{"id":"https://openalex.org/keywords/noisy-text-analytics","display_name":"Noisy text analytics","score":0.5209288001060486},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.4997427463531494},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.46693864464759827},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4480932354927063},{"id":"https://openalex.org/keywords/noisy-data","display_name":"Noisy data","score":0.4294319450855255},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.4213997721672058},{"id":"https://openalex.org/keywords/noise-measurement","display_name":"Noise measurement","score":0.41707855463027954},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3857221305370331},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3682193160057068},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.35671359300613403},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.34770452976226807},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.22626280784606934},{"id":"https://openalex.org/keywords/text-mining","display_name":"Text mining","score":0.14223268628120422},{"id":"https://openalex.org/keywords/text-graph","display_name":"Text graph","score":0.12468776106834412}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8217369318008423},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.7077257633209229},{"id":"https://openalex.org/C2780428219","wikidata":"https://www.wikidata.org/wiki/Q16952335","display_name":"Cover (algebra)","level":2,"score":0.5772016644477844},{"id":"https://openalex.org/C2779500292","wikidata":"https://www.wikidata.org/wiki/Q14802672","display_name":"Text processing","level":2,"score":0.5339414477348328},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.5274819731712341},{"id":"https://openalex.org/C151375590","wikidata":"https://www.wikidata.org/wiki/Q17147076","display_name":"Noisy text analytics","level":4,"score":0.5209288001060486},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.4997427463531494},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.46693864464759827},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4480932354927063},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.4294319450855255},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.4213997721672058},{"id":"https://openalex.org/C29265498","wikidata":"https://www.wikidata.org/wiki/Q7047719","display_name":"Noise measurement","level":3,"score":0.41707855463027954},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3857221305370331},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3682193160057068},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.35671359300613403},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34770452976226807},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.22626280784606934},{"id":"https://openalex.org/C71472368","wikidata":"https://www.wikidata.org/wiki/Q676880","display_name":"Text mining","level":2,"score":0.14223268628120422},{"id":"https://openalex.org/C66945725","wikidata":"https://www.wikidata.org/wiki/Q18388823","display_name":"Text graph","level":3,"score":0.12468776106834412},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1568296.1568315","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1568296.1568315","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of The Third Workshop on Analytics for Noisy Unstructured Text Data","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6299999952316284}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W20984399","https://openalex.org/W24068845","https://openalex.org/W55333121","https://openalex.org/W66690650","https://openalex.org/W184288596","https://openalex.org/W1530635668","https://openalex.org/W1550863320","https://openalex.org/W1560797130","https://openalex.org/W1976978162","https://openalex.org/W1977421239","https://openalex.org/W2002727129","https://openalex.org/W2006969979","https://openalex.org/W2010595692","https://openalex.org/W2011084371","https://openalex.org/W2025641162","https://openalex.org/W2038647974","https://openalex.org/W2053966956","https://openalex.org/W2096961418","https://openalex.org/W2099140003","https://openalex.org/W2101105183","https://openalex.org/W2101200183","https://openalex.org/W2109088983","https://openalex.org/W2113314962","https://openalex.org/W2120535679","https://openalex.org/W2122254380","https://openalex.org/W2123301721","https://openalex.org/W2129271949","https://openalex.org/W2133058591","https://openalex.org/W2133174033","https://openalex.org/W2133512280","https://openalex.org/W2141335422","https://openalex.org/W2153190022","https://openalex.org/W2157023969","https://openalex.org/W2160039544","https://openalex.org/W2600463316","https://openalex.org/W2998215494","https://openalex.org/W4300550506"],"related_works":["https://openalex.org/W2357267845","https://openalex.org/W2770471982","https://openalex.org/W2770474375","https://openalex.org/W2152349655","https://openalex.org/W1568011275","https://openalex.org/W3038648369","https://openalex.org/W3090423411","https://openalex.org/W3017354176","https://openalex.org/W2748790808","https://openalex.org/W2157023969"],"abstract_inverted_index":{"Often,":[0],"in":[1,8,44,77],"the":[2,72],"real":[3],"world":[4],"noise":[5,76],"is":[6],"ubiquitous":[7],"text":[9,42,88],"communications.":[10],"Text":[11],"produced":[12,43],"by":[13],"processing":[14,38],"signals":[15],"intended":[16],"for":[17,23,75,89],"human":[18],"use":[19],"are":[20],"often":[21],"noisy":[22,87],"automated":[24],"computer":[25],"processing.":[26],"Automatic":[27],"speech":[28],"recognition,":[29],"optical":[30],"character":[31],"recognition":[32],"and":[33,58,95],"machine":[34],"translation":[35],"all":[36],"introduce":[37],"noise.":[39,63],"Also":[40],"digital":[41],"informal":[45],"settings":[46],"such":[47],"as":[48],"online":[49],"chat,":[50],"SMS,":[51],"emails,":[52],"message":[53],"boards,":[54],"newsgroups,":[55],"blogs,":[56],"wikis":[57],"web":[59],"pages":[60],"contain":[61],"considerable":[62],"In":[64],"this":[65,86],"paper,":[66],"we":[67],"present":[68],"a":[69],"survey":[70],"of":[71],"existing":[73],"measures":[74],"text.":[78],"We":[79],"also":[80],"cover":[81],"application":[82],"areas":[83],"that":[84],"ingest":[85],"various":[90],"tasks":[91],"like":[92],"Information":[93,96],"Retrieval":[94],"Extraction.":[97]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":9},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":6},{"year":2014,"cited_by_count":8},{"year":2013,"cited_by_count":7},{"year":2012,"cited_by_count":7}],"updated_date":"2026-03-25T14:56:36.534964","created_date":"2025-10-10T00:00:00"}
