{"id":"https://openalex.org/W1887786044","doi":"https://doi.org/10.18653/v1/w15-4303","title":"Toward Tweets Normalization Using Maximum Entropy","display_name":"Toward Tweets Normalization Using Maximum Entropy","publication_year":2015,"publication_date":"2015-01-01","ids":{"openalex":"https://openalex.org/W1887786044","doi":"https://doi.org/10.18653/v1/w15-4303","mag":"1887786044"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w15-4303","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w15-4303","pdf_url":"https://www.aclweb.org/anthology/W15-4303.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Workshop on Noisy User-generated Text","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W15-4303.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019244064","display_name":"Mohammad Arshi Saloot","orcid":"https://orcid.org/0000-0003-0819-6530"},"institutions":[{"id":"https://openalex.org/I33849332","display_name":"University of Malaya","ror":"https://ror.org/00rzspn62","country_code":"MY","type":"education","lineage":["https://openalex.org/I33849332"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["MY","SG"],"is_corresponding":true,"raw_author_name":"Mohammad Arshi Saloot","raw_affiliation_strings":["Department of Information System, University of Malaya, 50603, Malaysia","Department of Artificial Intelligence, University of Malaya, 50603, Malaysia","Institute for Infocomm Research (I2R), A"],"affiliations":[{"raw_affiliation_string":"Department of Information System, University of Malaya, 50603, Malaysia","institution_ids":["https://openalex.org/I33849332"]},{"raw_affiliation_string":"Department of Artificial Intelligence, University of Malaya, 50603, Malaysia","institution_ids":["https://openalex.org/I33849332"]},{"raw_affiliation_string":"Institute for Infocomm Research (I2R), A","institution_ids":["https://openalex.org/I3005327000"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025895244","display_name":"Norisma Idris","orcid":"https://orcid.org/0000-0002-8006-7496"},"institutions":[{"id":"https://openalex.org/I33849332","display_name":"University of Malaya","ror":"https://ror.org/00rzspn62","country_code":"MY","type":"education","lineage":["https://openalex.org/I33849332"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["MY","SG"],"is_corresponding":false,"raw_author_name":"Norisma Idris","raw_affiliation_strings":["Department of Information System, University of Malaya, 50603, Malaysia","Department of Artificial Intelligence, University of Malaya, 50603, Malaysia","Institute for Infocomm Research (I2R), A"],"affiliations":[{"raw_affiliation_string":"Department of Information System, University of Malaya, 50603, Malaysia","institution_ids":["https://openalex.org/I33849332"]},{"raw_affiliation_string":"Department of Artificial Intelligence, University of Malaya, 50603, Malaysia","institution_ids":["https://openalex.org/I33849332"]},{"raw_affiliation_string":"Institute for Infocomm Research (I2R), A","institution_ids":["https://openalex.org/I3005327000"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010457539","display_name":"Liyana Shuib","orcid":"https://orcid.org/0000-0002-7907-0671"},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]},{"id":"https://openalex.org/I33849332","display_name":"University of Malaya","ror":"https://ror.org/00rzspn62","country_code":"MY","type":"education","lineage":["https://openalex.org/I33849332"]}],"countries":["MY","SG"],"is_corresponding":false,"raw_author_name":"Liyana Shuib","raw_affiliation_strings":["Department of Artificial Intelligence, University of Malaya, 50603, Malaysia","Institute for Infocomm Research (I2R), A","Department of Information System, University of Malaya, 50603, Malaysia"],"affiliations":[{"raw_affiliation_string":"Department of Artificial Intelligence, University of Malaya, 50603, Malaysia","institution_ids":["https://openalex.org/I33849332"]},{"raw_affiliation_string":"Institute for Infocomm Research (I2R), A","institution_ids":["https://openalex.org/I3005327000"]},{"raw_affiliation_string":"Department of Information System, University of Malaya, 50603, Malaysia","institution_ids":["https://openalex.org/I33849332"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040454046","display_name":"Ram Gopal Raj","orcid":"https://orcid.org/0000-0002-8627-1113"},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]},{"id":"https://openalex.org/I33849332","display_name":"University of Malaya","ror":"https://ror.org/00rzspn62","country_code":"MY","type":"education","lineage":["https://openalex.org/I33849332"]}],"countries":["MY","SG"],"is_corresponding":false,"raw_author_name":"Ram Gopal Raj","raw_affiliation_strings":["Department of Artificial Intelligence, University of Malaya, 50603, Malaysia","Institute for Infocomm Research (I2R), A","Department of Information System, University of Malaya, 50603, Malaysia"],"affiliations":[{"raw_affiliation_string":"Department of Artificial Intelligence, University of Malaya, 50603, Malaysia","institution_ids":["https://openalex.org/I33849332"]},{"raw_affiliation_string":"Institute for Infocomm Research (I2R), A","institution_ids":["https://openalex.org/I3005327000"]},{"raw_affiliation_string":"Department of Information System, University of Malaya, 50603, Malaysia","institution_ids":["https://openalex.org/I33849332"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083501418","display_name":"AiTi Aw","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"AiTi Aw","raw_affiliation_strings":["STAR, Singapore"],"affiliations":[{"raw_affiliation_string":"STAR, Singapore","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5019244064"],"corresponding_institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I33849332"],"apc_list":null,"apc_paid":null,"fwci":3.1157,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.92600466,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"19","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7739665508270264},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.739607036113739},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6029454469680786},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.5639406442642212},{"id":"https://openalex.org/keywords/microblogging","display_name":"Microblogging","score":0.5515227913856506},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.5503093004226685},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5143169164657593},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4644365906715393},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.45941978693008423},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.45127999782562256},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.4461970627307892},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37532517313957214},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3260025382041931}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7739665508270264},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.739607036113739},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6029454469680786},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.5639406442642212},{"id":"https://openalex.org/C143275388","wikidata":"https://www.wikidata.org/wiki/Q92438","display_name":"Microblogging","level":3,"score":0.5515227913856506},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5503093004226685},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5143169164657593},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4644365906715393},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.45941978693008423},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.45127999782562256},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.4461970627307892},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37532517313957214},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3260025382041931},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/w15-4303","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w15-4303","pdf_url":"https://www.aclweb.org/anthology/W15-4303.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Workshop on Noisy User-generated Text","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/w15-4303","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w15-4303","pdf_url":"https://www.aclweb.org/anthology/W15-4303.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Workshop on Noisy User-generated Text","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.699999988079071,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G8819370986","display_name":null,"funder_award_id":"2014B","funder_id":"https://openalex.org/F4320322604","funder_display_name":"Universiti Malaya"}],"funders":[{"id":"https://openalex.org/F4320322604","display_name":"Universiti Malaya","ror":"https://ror.org/00rzspn62"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1887786044.pdf","grobid_xml":"https://content.openalex.org/works/W1887786044.grobid-xml"},"referenced_works_count":47,"referenced_works":["https://openalex.org/W63056884","https://openalex.org/W157541337","https://openalex.org/W159246177","https://openalex.org/W1507805082","https://openalex.org/W1508977358","https://openalex.org/W1532325895","https://openalex.org/W1589170661","https://openalex.org/W1631260214","https://openalex.org/W1972099155","https://openalex.org/W1976214863","https://openalex.org/W1997060558","https://openalex.org/W2016871293","https://openalex.org/W2032355794","https://openalex.org/W2033523543","https://openalex.org/W2047333734","https://openalex.org/W2053966956","https://openalex.org/W2059067870","https://openalex.org/W2095743640","https://openalex.org/W2096175520","https://openalex.org/W2096438711","https://openalex.org/W2101105183","https://openalex.org/W2101200183","https://openalex.org/W2109919255","https://openalex.org/W2113540630","https://openalex.org/W2124807415","https://openalex.org/W2126312193","https://openalex.org/W2128529468","https://openalex.org/W2133503566","https://openalex.org/W2144226312","https://openalex.org/W2146867136","https://openalex.org/W2154124206","https://openalex.org/W2156985047","https://openalex.org/W2160637503","https://openalex.org/W2163942301","https://openalex.org/W2164107060","https://openalex.org/W2595715041","https://openalex.org/W2914314925","https://openalex.org/W4243814156","https://openalex.org/W6602579647","https://openalex.org/W6636811518","https://openalex.org/W6644136106","https://openalex.org/W6654466657","https://openalex.org/W6665163735","https://openalex.org/W6679793822","https://openalex.org/W6683927680","https://openalex.org/W6759047975","https://openalex.org/W6979697213"],"related_works":["https://openalex.org/W2275433313","https://openalex.org/W2053241453","https://openalex.org/W2017590198","https://openalex.org/W2978974359","https://openalex.org/W2728430307","https://openalex.org/W2153980712","https://openalex.org/W2047632477","https://openalex.org/W4287644835","https://openalex.org/W3092281475","https://openalex.org/W3098003361"],"abstract_inverted_index":{"The":[0,53,88,166],"use":[1,32],"of":[2,90,107,146],"social":[3],"network":[4],"services":[5],"and":[6,118,138],"microblogs,":[7],"such":[8],"as":[9],"Twitter,":[10],"has":[11],"created":[12],"valuable":[13],"text":[14],"resources,":[15],"which":[16],"contain":[17,23],"extremely":[18],"noisy":[19],"text.":[20],"Twitter":[21],"messages":[22],"so":[24],"much":[25],"noise":[26],"that":[27,58,182],"it":[28],"is":[29],"difficult":[30],"to":[31,73],"them":[33],"in":[34,61,84,152,173],"natural":[35],"language":[36,140],"processing":[37],"tasks.":[38],"This":[39],"paper":[40],"presents":[41],"a":[42,70,99,105,134,139,153],"new":[43],"approach":[44,55,79,167,186],"using":[45,131,175],"the":[46,62,66,77,85,95,143,147,156,160,183],"maximum":[47,67,91,184],"entropy":[48,68,92,185],"model":[49,148,157],"for":[50,110,128,164],"normalizing":[51],"Tweets.":[52,177],"proposed":[54,78],"addresses":[56],"words":[57],"are":[59,126,150],"unseen":[60,82],"training":[63,71,86,154],"phase.":[64],"Although":[65],"needs":[69],"dataset":[72],"adjust":[74],"its":[75],"parameters,":[76],"can":[80,158],"normalize":[81],"data":[83],"set.":[87],"principle":[89],"emphasizes":[93],"incorporating":[94],"available":[96],"features":[97],"into":[98],"uniform":[100],"model.":[101,141],"First,":[102],"we":[103],"generate":[104],"set":[106],"normalized":[108],"candidates":[109],"each":[111,129],"out-ofvocabulary":[112],"word":[113],"based":[114],"on":[115],"lexical,":[116],"phonemic,":[117],"morphophonemic":[119],"similarities.":[120],"Then,":[121],"three":[122],"different":[123],"probability":[124,162],"scores":[125],"calculated":[127],"candidate":[130],"positional":[132],"indexing,":[133],"dependency-based":[135],"frequency":[136],"feature":[137],"After":[142],"optimal":[144],"values":[145],"parameters":[149],"obtained":[151],"phase,":[155],"calculate":[159],"final":[161],"value":[163],"candidates.":[165],"achieved":[168],"an":[169],"83.12":[170],"BLEU":[171],"score":[172],"testing":[174],"2,000":[176],"Our":[178],"experimental":[179],"results":[180],"show":[181],"significantly":[187],"outperforms":[188],"previ-ous":[189],"well-known":[190],"normalization":[191],"approaches.":[192]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
