{"id":"https://openalex.org/W2250967669","doi":"https://doi.org/10.3115/v1/w14-3901","title":"Foreign Words and the Automatic Processing of Arabic Social Media Text Written in Roman Script","display_name":"Foreign Words and the Automatic Processing of Arabic Social Media Text Written in Roman Script","publication_year":2014,"publication_date":"2014-01-01","ids":{"openalex":"https://openalex.org/W2250967669","doi":"https://doi.org/10.3115/v1/w14-3901","mag":"2250967669"},"language":"en","primary_location":{"id":"doi:10.3115/v1/w14-3901","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/w14-3901","pdf_url":"https://aclanthology.org/W14-3901.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the First Workshop on Computational Approaches to Code Switching","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/W14-3901.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060938475","display_name":"Ramy Eskander","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ramy Eskander","raw_affiliation_strings":["Center for Computational Learning Systems, Columbia University"],"affiliations":[{"raw_affiliation_string":"Center for Computational Learning Systems, Columbia University","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073237528","display_name":"Mohamed Al-Badrashiny","orcid":null},"institutions":[{"id":"https://openalex.org/I193531525","display_name":"George Washington University","ror":"https://ror.org/00y4zzh67","country_code":"US","type":"education","lineage":["https://openalex.org/I193531525"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mohamed Al-Badrashiny","raw_affiliation_strings":["Department of Computer Science, The George Washington University"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, The George Washington University","institution_ids":["https://openalex.org/I193531525"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084517393","display_name":"Nizar Habash","orcid":"https://orcid.org/0000-0002-1831-3457"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nizar Habash","raw_affiliation_strings":["Computer Science Department, New York University Abu Dhabi"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, New York University Abu Dhabi","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021314411","display_name":"Owen Rambow","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Owen Rambow","raw_affiliation_strings":["Center for Computational Learning Systems, Columbia University"],"affiliations":[{"raw_affiliation_string":"Center for Computational Learning Systems, Columbia University","institution_ids":["https://openalex.org/I78577930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5060938475"],"corresponding_institution_ids":["https://openalex.org/I78577930"],"apc_list":null,"apc_paid":null,"fwci":5.7261,"has_fulltext":false,"cited_by_count":46,"citation_normalized_percentile":{"value":0.962097,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/punctuation","display_name":"Punctuation","score":0.7294232249259949},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7286562919616699},{"id":"https://openalex.org/keywords/diglossia","display_name":"Diglossia","score":0.6815651655197144},{"id":"https://openalex.org/keywords/alphabet","display_name":"Alphabet","score":0.660801112651825},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.6396969556808472},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5910274982452393},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.5903124809265137},{"id":"https://openalex.org/keywords/arabic-script","display_name":"Arabic script","score":0.5066280961036682},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4850101172924042},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.48450157046318054},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4354330599308014},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.1170225441455841},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.10665148496627808}],"concepts":[{"id":"https://openalex.org/C540372491","wikidata":"https://www.wikidata.org/wiki/Q82622","display_name":"Punctuation","level":2,"score":0.7294232249259949},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7286562919616699},{"id":"https://openalex.org/C504331141","wikidata":"https://www.wikidata.org/wiki/Q59203","display_name":"Diglossia","level":3,"score":0.6815651655197144},{"id":"https://openalex.org/C112876837","wikidata":"https://www.wikidata.org/wiki/Q837518","display_name":"Alphabet","level":2,"score":0.660801112651825},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.6396969556808472},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5910274982452393},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.5903124809265137},{"id":"https://openalex.org/C2777323237","wikidata":"https://www.wikidata.org/wiki/Q1828555","display_name":"Arabic script","level":3,"score":0.5066280961036682},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4850101172924042},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.48450157046318054},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4354330599308014},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.1170225441455841},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.10665148496627808},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3115/v1/w14-3901","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/w14-3901","pdf_url":"https://aclanthology.org/W14-3901.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the First Workshop on Computational Approaches to Code Switching","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.673.5421","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.673.5421","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.aclweb.org/anthology/W/W14/W14-3901.pdf","raw_type":"text"}],"best_oa_location":{"id":"doi:10.3115/v1/w14-3901","is_oa":true,"landing_page_url":"https://doi.org/10.3115/v1/w14-3901","pdf_url":"https://aclanthology.org/W14-3901.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the First Workshop on Computational Approaches to Code Switching","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8100000023841858,"display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2250967669.pdf","grobid_xml":"https://content.openalex.org/works/W2250967669.grobid-xml"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W131663347","https://openalex.org/W162697342","https://openalex.org/W201141796","https://openalex.org/W408046128","https://openalex.org/W1550053614","https://openalex.org/W1728658630","https://openalex.org/W1966863499","https://openalex.org/W2032355794","https://openalex.org/W2104463314","https://openalex.org/W2109704865","https://openalex.org/W2133990480","https://openalex.org/W2138738738","https://openalex.org/W2153848201","https://openalex.org/W2157765050","https://openalex.org/W2166660646","https://openalex.org/W2183864645","https://openalex.org/W2250212502","https://openalex.org/W2250274454","https://openalex.org/W2250414785","https://openalex.org/W2250816155","https://openalex.org/W2250868350","https://openalex.org/W2251092354","https://openalex.org/W2251530528","https://openalex.org/W2251658995","https://openalex.org/W4233787372"],"related_works":["https://openalex.org/W3207063035","https://openalex.org/W1991557132","https://openalex.org/W2129098172","https://openalex.org/W2291665391","https://openalex.org/W4307308274","https://openalex.org/W597397715","https://openalex.org/W4391654281","https://openalex.org/W3177903323","https://openalex.org/W2890187879","https://openalex.org/W2093682362"],"abstract_inverted_index":{"Arabic":[0,52,79,85],"on":[1,11,96],"social":[2,12],"media":[3,13],"has":[4],"all":[5],"the":[6,29,84],"prop-erties":[7],"of":[8,31,69,93],"any":[9],"language":[10,20],"that":[14],"make":[15],"it":[16],"tough":[17],"for":[18],"natural":[19],"processing,":[21],"plus":[22],"some":[23],"specific":[24],"problems.":[25],"These":[26],"include":[27],"diglossia,":[28],"use":[30],"an":[32,89,97],"alternative":[33],"alphabet":[34],"(Roman),":[35],"and":[36,77,81],"code":[37],"switching":[38],"with":[39],"foreign":[40,65],"languages.":[41],"In":[42],"this":[43],"paper,":[44],"we":[45],"present":[46],"a":[47,64],"system":[48,91],"which":[49],"can":[50],"process":[51],"written":[53],"in":[54],"Roman":[55],"alpha-bet":[56],"(\u201cArabizi\u201d).":[57],"It":[58],"identifies":[59],"whether":[60],"each":[61],"word":[62,66],"is":[63],"or":[67],"one":[68],"an-other":[70],"four":[71],"categories":[72],"(Arabic,":[73],"name,":[74],"punc-tuation,":[75],"sound),":[76],"transliterates":[78],"words":[80],"names":[82],"into":[83],"alphabet.":[86],"We":[87],"obtain":[88],"overall":[90],"performance":[92],"83.8":[94],"%":[95],"unseen":[98],"test":[99],"set.":[100],"1":[101]},"counts_by_year":[{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":12},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":8},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
