{"id":"https://openalex.org/W2012325806","doi":"https://doi.org/10.1145/1568296.1568314","title":"Studying the effects of noisy text on text mining applications","display_name":"Studying the effects of noisy text on text mining applications","publication_year":2009,"publication_date":"2009-07-23","ids":{"openalex":"https://openalex.org/W2012325806","doi":"https://doi.org/10.1145/1568296.1568314","mag":"2012325806"},"language":"en","primary_location":{"id":"doi:10.1145/1568296.1568314","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1568296.1568314","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of The Third Workshop on Analytics for Noisy Unstructured Text Data","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102724695","display_name":"Lipika Dey","orcid":"https://orcid.org/0000-0003-3831-5545"},"institutions":[{"id":"https://openalex.org/I55215948","display_name":"Tata Consultancy Services (India)","ror":"https://ror.org/01b9n8m42","country_code":"IN","type":"company","lineage":["https://openalex.org/I4210086519","https://openalex.org/I55215948"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Lipika Dey","raw_affiliation_strings":["TCS Innovation Lab, Delhi, India"],"affiliations":[{"raw_affiliation_string":"TCS Innovation Lab, Delhi, India","institution_ids":["https://openalex.org/I55215948"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009858204","display_name":"Shamsul Haque","orcid":null},"institutions":[{"id":"https://openalex.org/I55215948","display_name":"Tata Consultancy Services (India)","ror":"https://ror.org/01b9n8m42","country_code":"IN","type":"company","lineage":["https://openalex.org/I4210086519","https://openalex.org/I55215948"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"S. K. Mirajul Haque","raw_affiliation_strings":["TCS Innovation Lab, Delhi, India"],"affiliations":[{"raw_affiliation_string":"TCS Innovation Lab, Delhi, India","institution_ids":["https://openalex.org/I55215948"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5102724695"],"corresponding_institution_ids":["https://openalex.org/I55215948"],"apc_list":null,"apc_paid":null,"fwci":3.04918749,"has_fulltext":false,"cited_by_count":39,"citation_normalized_percentile":{"value":0.93938118,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"107","last_page":"114"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9926999807357788,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8332127928733826},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.6495525240898132},{"id":"https://openalex.org/keywords/sentiment-analysis","display_name":"Sentiment analysis","score":0.6322216391563416},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5748567581176758},{"id":"https://openalex.org/keywords/noisy-text-analytics","display_name":"Noisy text analytics","score":0.5563941597938538},{"id":"https://openalex.org/keywords/text-mining","display_name":"Text mining","score":0.522388756275177},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5067723393440247},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4962936043739319},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4861118793487549},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.44727709889411926},{"id":"https://openalex.org/keywords/biomedical-text-mining","display_name":"Biomedical text mining","score":0.43177205324172974},{"id":"https://openalex.org/keywords/text-graph","display_name":"Text graph","score":0.42882779240608215},{"id":"https://openalex.org/keywords/web-mining","display_name":"Web mining","score":0.4189388155937195},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.41572582721710205},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.28351885080337524},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.19196325540542603}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8332127928733826},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.6495525240898132},{"id":"https://openalex.org/C66402592","wikidata":"https://www.wikidata.org/wiki/Q2271421","display_name":"Sentiment analysis","level":2,"score":0.6322216391563416},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5748567581176758},{"id":"https://openalex.org/C151375590","wikidata":"https://www.wikidata.org/wiki/Q17147076","display_name":"Noisy text analytics","level":4,"score":0.5563941597938538},{"id":"https://openalex.org/C71472368","wikidata":"https://www.wikidata.org/wiki/Q676880","display_name":"Text mining","level":2,"score":0.522388756275177},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5067723393440247},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4962936043739319},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4861118793487549},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.44727709889411926},{"id":"https://openalex.org/C165141518","wikidata":"https://www.wikidata.org/wiki/Q4915126","display_name":"Biomedical text mining","level":3,"score":0.43177205324172974},{"id":"https://openalex.org/C66945725","wikidata":"https://www.wikidata.org/wiki/Q18388823","display_name":"Text graph","level":3,"score":0.42882779240608215},{"id":"https://openalex.org/C197046077","wikidata":"https://www.wikidata.org/wiki/Q785337","display_name":"Web mining","level":3,"score":0.4189388155937195},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.41572582721710205},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.28351885080337524},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.19196325540542603},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1568296.1568314","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1568296.1568314","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of The Third Workshop on Analytics for Noisy Unstructured Text Data","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7099999785423279,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W47585637","https://openalex.org/W1502364498","https://openalex.org/W1581485226","https://openalex.org/W1989713378","https://openalex.org/W2052935438","https://openalex.org/W2081375810","https://openalex.org/W2103623170","https://openalex.org/W2127973959","https://openalex.org/W2168129207","https://openalex.org/W2169004485","https://openalex.org/W2470343719"],"related_works":["https://openalex.org/W2770471982","https://openalex.org/W2770474375","https://openalex.org/W2152349655","https://openalex.org/W4384067529","https://openalex.org/W2372183225","https://openalex.org/W2389119968","https://openalex.org/W1625494842","https://openalex.org/W2188854577","https://openalex.org/W2365299969","https://openalex.org/W2475935882"],"abstract_inverted_index":{"Text":[0,14],"mining":[1,15,126,130,140,146],"aims":[2],"at":[3],"deriving":[4],"high":[5],"quality":[6],"information":[7,33,63],"from":[8,64],"text":[9,67,125,129,145,154],"in":[10,34,156],"an":[11,35,157],"automated":[12],"way.":[13],"applications":[16],"rely":[17],"on":[18,50,105,135],"Natural":[19],"Language":[20],"Processing":[21],"(NLP)":[22],"tools":[23,43,121],"like":[24],"tagger,":[25],"parser":[26],"etc.":[27,84],"to":[28,48,61,93,110,150],"locate":[29],"and":[30,52,90,122],"retrieve":[31],"relevant":[32],"application":[36,131,147],"specific":[37],"manner.":[38],"Most":[39],"of":[40,74,96,116,118],"these":[41],"NLP":[42,120],"however":[44],"have":[45],"been":[46],"designed":[47],"work":[49],"clean":[51],"grammatically":[53],"correct":[54],"text.":[55],"Presently,":[56],"many":[57],"organizations":[58],"are":[59,87],"interested":[60],"derive":[62],"informally":[65],"written":[66],"that":[68,132,148],"is":[69,136,141],"generated":[70,155],"as":[71],"a":[72],"result":[73],"human":[75],"communication":[76],"through":[77],"emails,":[78],"or":[79],"blog":[80],"posts,":[81],"web-based":[82],"reviews":[83],"These":[85],"texts":[86],"highly":[88],"noisy":[89,153],"often":[91],"found":[92],"contain":[94],"mixture":[95],"languages.":[97],"In":[98],"this":[99],"study":[100],"we":[101,133],"present":[102],"some":[103,117],"analysis":[104],"how":[106],"noise":[107],"introduced":[108],"due":[109],"incorrect":[111],"English":[112],"affects":[113],"the":[114,119,124,142],"performance":[115],"thereafter":[123],"applications.":[127],"The":[128],"focus":[134],"opinion":[137],"mining.":[138],"Opinion":[139],"most":[143],"significant":[144],"has":[149],"deal":[151],"with":[152],"unregulated":[158],"fashion":[159],"by":[160],"users.":[161]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":4},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":7},{"year":2013,"cited_by_count":7},{"year":2012,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
