{"id":"https://openalex.org/W2128529468","doi":"https://doi.org/10.1093/llc/fqu066","title":"Twitter corpus creation: The case of a Malay Chat-style-text Corpus (MCC)","display_name":"Twitter corpus creation: The case of a Malay Chat-style-text Corpus (MCC)","publication_year":2014,"publication_date":"2014-12-14","ids":{"openalex":"https://openalex.org/W2128529468","doi":"https://doi.org/10.1093/llc/fqu066","mag":"2128529468"},"language":"en","primary_location":{"id":"doi:10.1093/llc/fqu066","is_oa":false,"landing_page_url":"https://doi.org/10.1093/llc/fqu066","pdf_url":null,"source":{"id":"https://openalex.org/S2734814886","display_name":"Digital Scholarship in the Humanities","issn_l":"2055-7671","issn":["2055-7671","2055-768X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Digital Scholarship in the Humanities","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019244064","display_name":"Mohammad Arshi Saloot","orcid":"https://orcid.org/0000-0003-0819-6530"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]},{"id":"https://openalex.org/I33849332","display_name":"University of Malaya","ror":"https://ror.org/00rzspn62","country_code":"MY","type":"education","lineage":["https://openalex.org/I33849332"]},{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["MY","SG"],"is_corresponding":true,"raw_author_name":"Mohammad Arshi Saloot","raw_affiliation_strings":["University of Malaya, Malaysia and Institute for Infocomm Research (I2R), A*STAR, Singapore"],"affiliations":[{"raw_affiliation_string":"University of Malaya, Malaysia and Institute for Infocomm Research (I2R), A*STAR, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I165932596","https://openalex.org/I115228651","https://openalex.org/I33849332"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025895244","display_name":"Norisma Idris","orcid":"https://orcid.org/0000-0002-8006-7496"},"institutions":[{"id":"https://openalex.org/I33849332","display_name":"University of Malaya","ror":"https://ror.org/00rzspn62","country_code":"MY","type":"education","lineage":["https://openalex.org/I33849332"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Norisma Idris","raw_affiliation_strings":["University of Malaya,Malaysia"],"affiliations":[{"raw_affiliation_string":"University of Malaya,Malaysia","institution_ids":["https://openalex.org/I33849332"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083501418","display_name":"AiTi Aw","orcid":null},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]},{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"AiTi Aw","raw_affiliation_strings":["Institute for Infocomm Research (I2R), A*STAR, Singapore"],"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research (I2R), A*STAR, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003336718","display_name":"Dirk Thorleuchter","orcid":"https://orcid.org/0000-0002-6885-5422"},"institutions":[{"id":"https://openalex.org/I4210119566","display_name":"Fraunhofer Institute for Technological Trend Analysis","ror":"https://ror.org/02sm4kj57","country_code":"DE","type":"facility","lineage":["https://openalex.org/I4210119566","https://openalex.org/I4923324"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Dirk Thorleuchter","raw_affiliation_strings":["Fraunhofer INT, Euskirchen, Appelsgarten 2, Germany"],"affiliations":[{"raw_affiliation_string":"Fraunhofer INT, Euskirchen, Appelsgarten 2, Germany","institution_ids":["https://openalex.org/I4210119566"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5019244064"],"corresponding_institution_ids":["https://openalex.org/I115228651","https://openalex.org/I165932596","https://openalex.org/I3005327000","https://openalex.org/I33849332"],"apc_list":{"value":2522,"currency":"GBP","value_usd":3093},"apc_paid":null,"fwci":2.1902,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.87781318,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"31","issue":"2","first_page":"227","last_page":"243"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13155","display_name":"Digital Communication and Language","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13155","display_name":"Digital Communication and Language","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9819999933242798,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9797999858856201,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/corpus-linguistics","display_name":"Corpus linguistics","score":0.7333090305328369},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7219589948654175},{"id":"https://openalex.org/keywords/microblogging","display_name":"Microblogging","score":0.6647399067878723},{"id":"https://openalex.org/keywords/style","display_name":"Style (visual arts)","score":0.6594316959381104},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6511676907539368},{"id":"https://openalex.org/keywords/representativeness-heuristic","display_name":"Representativeness heuristic","score":0.6440823674201965},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.608241856098175},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.549201250076294},{"id":"https://openalex.org/keywords/chat-room","display_name":"Chat room","score":0.4507763087749481},{"id":"https://openalex.org/keywords/malay","display_name":"Malay","score":0.43692517280578613},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.4196542203426361},{"id":"https://openalex.org/keywords/text-corpus","display_name":"Text corpus","score":0.41140782833099365},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.40631502866744995},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.4030587375164032},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.16727006435394287},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.1347610056400299},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.11865946650505066}],"concepts":[{"id":"https://openalex.org/C532629269","wikidata":"https://www.wikidata.org/wiki/Q865083","display_name":"Corpus linguistics","level":2,"score":0.7333090305328369},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7219589948654175},{"id":"https://openalex.org/C143275388","wikidata":"https://www.wikidata.org/wiki/Q92438","display_name":"Microblogging","level":3,"score":0.6647399067878723},{"id":"https://openalex.org/C2776445246","wikidata":"https://www.wikidata.org/wiki/Q1792644","display_name":"Style (visual arts)","level":2,"score":0.6594316959381104},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6511676907539368},{"id":"https://openalex.org/C37381756","wikidata":"https://www.wikidata.org/wiki/Q20203288","display_name":"Representativeness heuristic","level":2,"score":0.6440823674201965},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.608241856098175},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.549201250076294},{"id":"https://openalex.org/C2777352073","wikidata":"https://www.wikidata.org/wiki/Q574577","display_name":"Chat room","level":3,"score":0.4507763087749481},{"id":"https://openalex.org/C2776938241","wikidata":"https://www.wikidata.org/wiki/Q9237","display_name":"Malay","level":2,"score":0.43692517280578613},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.4196542203426361},{"id":"https://openalex.org/C2474386","wikidata":"https://www.wikidata.org/wiki/Q461183","display_name":"Text corpus","level":2,"score":0.41140782833099365},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.40631502866744995},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4030587375164032},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.16727006435394287},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.1347610056400299},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.11865946650505066},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1093/llc/fqu066","is_oa":false,"landing_page_url":"https://doi.org/10.1093/llc/fqu066","pdf_url":null,"source":{"id":"https://openalex.org/S2734814886","display_name":"Digital Scholarship in the Humanities","issn_l":"2055-7671","issn":["2055-7671","2055-768X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https://openalex.org/P4310311648","https://openalex.org/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Digital Scholarship in the Humanities","raw_type":"journal-article"},{"id":"pmh:oai:open-archive.highwire.org:digitalsh:31/2/227","is_oa":false,"landing_page_url":"http://dsh.oxfordjournals.org/cgi/content/short/31/2/227","pdf_url":null,"source":{"id":"https://openalex.org/S4406923041","display_name":"HighWire Press Open Archive","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"TEXT"},{"id":"pmh:oai:publica.fraunhofer.de:publica/238236","is_oa":false,"landing_page_url":"https://publica.fraunhofer.de/handle/publica/238236","pdf_url":null,"source":{"id":"https://openalex.org/S4306400318","display_name":"Fraunhofer-Publica (Fraunhofer-Gesellschaft)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4923324","host_organization_name":"Fraunhofer-Gesellschaft","host_organization_lineage":["https://openalex.org/I4923324"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"journal article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7900000214576721,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W40549020","https://openalex.org/W49973572","https://openalex.org/W117057031","https://openalex.org/W156229664","https://openalex.org/W574522305","https://openalex.org/W1520100043","https://openalex.org/W1532083779","https://openalex.org/W1556151859","https://openalex.org/W1574901103","https://openalex.org/W1800296434","https://openalex.org/W1820505434","https://openalex.org/W1999608294","https://openalex.org/W2014516359","https://openalex.org/W2044688197","https://openalex.org/W2044769572","https://openalex.org/W2046804949","https://openalex.org/W2047295649","https://openalex.org/W2047449974","https://openalex.org/W2048828870","https://openalex.org/W2048975423","https://openalex.org/W2059067870","https://openalex.org/W2066682493","https://openalex.org/W2068571273","https://openalex.org/W2074952134","https://openalex.org/W2093842354","https://openalex.org/W2104232304","https://openalex.org/W2132697058","https://openalex.org/W2136151826","https://openalex.org/W2137049373","https://openalex.org/W2137870083","https://openalex.org/W2146867136","https://openalex.org/W2167102709","https://openalex.org/W2169200297","https://openalex.org/W2244626821","https://openalex.org/W2270833710","https://openalex.org/W2319041464","https://openalex.org/W2490430595","https://openalex.org/W2599630175","https://openalex.org/W2740874734","https://openalex.org/W2801840425","https://openalex.org/W2803437449","https://openalex.org/W2914314925","https://openalex.org/W2916086000","https://openalex.org/W3099138433","https://openalex.org/W3145042860","https://openalex.org/W4213287682","https://openalex.org/W4234262038","https://openalex.org/W4285719527","https://openalex.org/W6679690964","https://openalex.org/W6694040638","https://openalex.org/W6722942020","https://openalex.org/W6735299659","https://openalex.org/W6741886535","https://openalex.org/W6784997385"],"related_works":["https://openalex.org/W193977043","https://openalex.org/W2375537499","https://openalex.org/W2516977220","https://openalex.org/W1982878818","https://openalex.org/W3096124370","https://openalex.org/W2794113965","https://openalex.org/W2384280299","https://openalex.org/W2366526038","https://openalex.org/W2183648197","https://openalex.org/W4225941136"],"abstract_inverted_index":{"In":[0,74],"recent":[1],"years,":[2],"social":[3],"networks,":[4],"microblogs,":[5],"and":[6,15,57,90,136,146,174,182,197,204,220],"short":[7],"message":[8,150,185],"service":[9],"have":[10],"deeply":[11],"penetrated":[12],"peoples":[13],"lives,":[14],"thus,":[16],"chat-style":[17,24,39,84,93],"text":[18,25,94],"is":[19,96,192],"a":[20,38,45,71,83,111],"common":[21],"phenomenon.":[22],"This":[23,202],"has":[26,124],"many":[27],"unknown":[28],"features":[29],"for":[30,69,81,212],"linguists,":[31],"which":[32,123],"can":[33],"be":[34],"discovered":[35],"by":[36,194],"analyzing":[37],"corpus.":[40,73],"The":[41,121],"process":[42,108,206],"of":[43,109,130,148,158,161,166,184,189,207],"constructing":[44,110],"corpus":[46,50,67,79,85,95,113,168,191,203,208],"conforms":[47],"to":[48,60,76,105],"specific":[49,66],"criteria,":[51],"such":[52,138],"as":[53,115,139],"representativeness,":[54],"sampling,":[55],"variety,":[56],"chronology.":[58],"Up":[59],"now,":[61],"literature":[62],"does":[63],"not":[64],"provide":[65],"criteria":[68,80,102],"creating":[70,82,209],"chat-style-text":[72],"contrast":[75],"related":[77],"work,":[78],"are":[86,103,210],"provided.":[87],"An":[88],"exhaustive":[89],"reliable":[91],"Malay":[92,117],"still":[97],"lacking.":[98],"Thus,":[99],"the":[100,107,116,156,159,162,167,170,190,205],"provided":[101],"used":[104,142],"demonstrate":[106],"Twitter":[112,149,179],"known":[114],"Chat-style":[118],"Corpus":[119],"(MCC).":[120],"MCC,":[122],"1":[125],"million":[126],"twitter":[127,143],"messages,":[128],"consists":[129],"14,484,384":[131],"word":[132],"instances,":[133],"646,807":[134],"terms":[135,173],"metadata,":[137],"posting":[140],"time,":[141],"client":[144],"application,":[145],"type":[147],"(simple":[151],"Tweet,":[152],"Retweet,":[153],"Reply).":[154],"Furthermore,":[155],"results":[157],"analysis":[160],"MCC":[163],"reveal":[164],"characteristics":[165],"including":[169],"most":[171],"frequent":[172],"collocations,":[175],"Zipf":[176],"law":[177],"diagram,":[178],"peak":[180],"hours,":[181],"percentages":[183],"types.":[186],"Finally,":[187],"representativeness":[188],"evaluated":[193],"employing":[195],"cartography":[196],"automatic":[198],"language":[199,218],"identification":[200],"methods.":[201],"valuable":[211],"researchers":[213],"working":[214],"in":[215],"linguistics,":[216],"natural":[217],"processing,":[219],"data":[221],"mining.":[222]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":4},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2016-06-24T00:00:00"}
