{"id":"https://openalex.org/W2900070093","doi":"https://doi.org/10.1109/hpcc/smartcity/dss.2018.00265","title":"A Heuristic Based Pre-processing Methodology for Short Text Similarity Measures in Microblogs","display_name":"A Heuristic Based Pre-processing Methodology for Short Text Similarity Measures in Microblogs","publication_year":2018,"publication_date":"2018-06-01","ids":{"openalex":"https://openalex.org/W2900070093","doi":"https://doi.org/10.1109/hpcc/smartcity/dss.2018.00265","mag":"2900070093"},"language":"en","primary_location":{"id":"doi:10.1109/hpcc/smartcity/dss.2018.00265","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpcc/smartcity/dss.2018.00265","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE 20th International Conference on High Performance Computing and Communications; IEEE 16th International Conference on Smart City; IEEE 4th International Conference on Data Science and Systems (HPCC/SmartCity/DSS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056646507","display_name":"Noufa Alnajran","orcid":null},"institutions":[{"id":"https://openalex.org/I11983389","display_name":"Manchester Metropolitan University","ror":"https://ror.org/02hstj355","country_code":"GB","type":"education","lineage":["https://openalex.org/I11983389"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Noufa Alnajran","raw_affiliation_strings":["School of Computing, Mathematics, and Digital Technology, Manchester Metropolitan University Manchester, Manchester, Uk"],"affiliations":[{"raw_affiliation_string":"School of Computing, Mathematics, and Digital Technology, Manchester Metropolitan University Manchester, Manchester, Uk","institution_ids":["https://openalex.org/I11983389"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084629312","display_name":"Keeley Crockett","orcid":"https://orcid.org/0000-0003-1941-6201"},"institutions":[{"id":"https://openalex.org/I11983389","display_name":"Manchester Metropolitan University","ror":"https://ror.org/02hstj355","country_code":"GB","type":"education","lineage":["https://openalex.org/I11983389"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Keeley Crockett","raw_affiliation_strings":["School of Computing, Mathematics, and Digital Technology, Manchester Metropolitan University Manchester, Manchester, Uk"],"affiliations":[{"raw_affiliation_string":"School of Computing, Mathematics, and Digital Technology, Manchester Metropolitan University Manchester, Manchester, Uk","institution_ids":["https://openalex.org/I11983389"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020271295","display_name":"David McLean","orcid":"https://orcid.org/0000-0001-7894-5176"},"institutions":[{"id":"https://openalex.org/I11983389","display_name":"Manchester Metropolitan University","ror":"https://ror.org/02hstj355","country_code":"GB","type":"education","lineage":["https://openalex.org/I11983389"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"David McLean","raw_affiliation_strings":["School of Computing, Mathematics, and Digital Technology, Manchester Metropolitan University Manchester, Manchester, Uk"],"affiliations":[{"raw_affiliation_string":"School of Computing, Mathematics, and Digital Technology, Manchester Metropolitan University Manchester, Manchester, Uk","institution_ids":["https://openalex.org/I11983389"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065264813","display_name":"Annabel Latham","orcid":"https://orcid.org/0000-0002-8410-7950"},"institutions":[{"id":"https://openalex.org/I11983389","display_name":"Manchester Metropolitan University","ror":"https://ror.org/02hstj355","country_code":"GB","type":"education","lineage":["https://openalex.org/I11983389"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Annabel Latham","raw_affiliation_strings":["School of Computing, Mathematics, and Digital Technology, Manchester Metropolitan University Manchester, Manchester, Uk"],"affiliations":[{"raw_affiliation_string":"School of Computing, Mathematics, and Digital Technology, Manchester Metropolitan University Manchester, Manchester, Uk","institution_ids":["https://openalex.org/I11983389"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5056646507"],"corresponding_institution_ids":["https://openalex.org/I11983389"],"apc_list":null,"apc_paid":null,"fwci":0.8461,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.80987072,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"1","issue":null,"first_page":"1627","last_page":"1633"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/microblogging","display_name":"Microblogging","score":0.9069090485572815},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7400703430175781},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.6554355025291443},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.6312294006347656},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6062707901000977},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4221195876598358},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.374683678150177},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.25502756237983704}],"concepts":[{"id":"https://openalex.org/C143275388","wikidata":"https://www.wikidata.org/wiki/Q92438","display_name":"Microblogging","level":3,"score":0.9069090485572815},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7400703430175781},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.6554355025291443},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.6312294006347656},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6062707901000977},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4221195876598358},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.374683678150177},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.25502756237983704},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/hpcc/smartcity/dss.2018.00265","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpcc/smartcity/dss.2018.00265","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE 20th International Conference on High Performance Computing and Communications; IEEE 16th International Conference on Smart City; IEEE 4th International Conference on Data Science and Systems (HPCC/SmartCity/DSS)","raw_type":"proceedings-article"},{"id":"pmh:oai:e-space.mmu.ac.uk:621804","is_oa":false,"landing_page_url":"https://e-space.mmu.ac.uk/view/authors/21ce2b0bfdffc75e5fdcb68d6aabc24f.html>","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"PeerReviewed"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5099999904632568,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W15334911","https://openalex.org/W2102381086","https://openalex.org/W2110503409","https://openalex.org/W2121227244","https://openalex.org/W2124280114","https://openalex.org/W2142384583","https://openalex.org/W2146007434","https://openalex.org/W2156741031","https://openalex.org/W2159849140","https://openalex.org/W2167109146","https://openalex.org/W2199399284","https://openalex.org/W2251345040","https://openalex.org/W2251861449","https://openalex.org/W2387314750","https://openalex.org/W2402651455","https://openalex.org/W2507893932","https://openalex.org/W2512317583","https://openalex.org/W2619234116","https://openalex.org/W2752475888","https://openalex.org/W6681309916","https://openalex.org/W6713497713"],"related_works":["https://openalex.org/W2728430307","https://openalex.org/W2107786128","https://openalex.org/W2053241453","https://openalex.org/W2153980712","https://openalex.org/W2537388533","https://openalex.org/W2036556872","https://openalex.org/W2017590198","https://openalex.org/W2978974359","https://openalex.org/W2021183651","https://openalex.org/W2353191283"],"abstract_inverted_index":{"Short":[0],"text":[1,56],"similarity":[2,63,88,103,146],"measures":[3,104],"have":[4],"lots":[5],"of":[6,42,52,86,102,108,113,131,152,170],"applications":[7],"in":[8,18,30,62,84,105,168],"online":[9],"social":[10],"networks":[11],"(OSN),":[12],"as":[13,67,129,144],"they":[14],"are":[15,118],"being":[16],"integrated":[17],"machine":[19],"learning":[20],"algorithms.":[21],"However,":[22],"the":[23,43,49,53,75,82,87,100,106,114,141,150,166],"data":[24,77],"quality":[25],"is":[26,58,72],"a":[27,59,93,145,156],"major":[28],"challenge":[29],"most":[31],"OSNs,":[32],"particularly":[33],"Twitter.":[34],"The":[35,111],"sparse,":[36],"ambiguous,":[37],"informal,":[38],"and":[39,69,120,172],"unstructured":[40],"nature":[41],"medium":[44],"impose":[45],"difficulties":[46],"to":[47,81,148],"capture":[48],"underlying":[50],"semantics":[51],"text.":[54],"Therefore,":[55],"pre-processing":[57,96,116],"crucial":[60],"phase":[61],"identification":[64],"applications,":[65],"such":[66],"clustering":[68],"classification.":[70],"This":[71,90],"because":[73],"selecting":[74],"appropriate":[76],"processing":[78],"methods":[79],"contributes":[80],"increase":[83],"correlations":[85,171],"measure.":[89],"research":[91],"proposes":[92],"novel":[94],"heuristic-driven":[95],"methodology":[97,117],"for":[98],"enhancing":[99],"performance":[101],"context":[107],"Twitter":[109],"tweets.":[110],"components":[112],"proposed":[115],"discussed":[119],"evaluated":[121],"on":[122],"an":[123],"annotated":[124],"dataset":[125],"that":[126,162],"was":[127,138],"published":[128],"part":[130],"SemEval-2014":[132],"shared":[133],"task.":[134],"An":[135],"experimental":[136],"analysis":[137],"conducted":[139],"using":[140],"cosine":[142],"angle":[143],"measure":[147],"assess":[149],"effect":[151],"our":[153,163],"method":[154],"against":[155],"baseline":[157,167],"(C-Method).":[158],"Experimental":[159],"results":[160],"indicate":[161],"approach":[164],"outperforms":[165],"terms":[169],"error":[173],"rates.":[174]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2018,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
