{"id":"https://openalex.org/W2986227781","doi":"https://doi.org/10.26615/978-954-452-056-4_086","title":"Comparing MT Approaches for Text Normalization","display_name":"Comparing MT Approaches for Text Normalization","publication_year":2019,"publication_date":"2019-10-22","ids":{"openalex":"https://openalex.org/W2986227781","doi":"https://doi.org/10.26615/978-954-452-056-4_086","mag":"2986227781"},"language":"en","primary_location":{"id":"doi:10.26615/978-954-452-056-4_086","is_oa":true,"landing_page_url":"https://doi.org/10.26615/978-954-452-056-4_086","pdf_url":"https://doi.org/10.26615/978-954-452-056-4_086","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings - Natural Language Processing in a Deep Learning World","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.26615/978-954-452-056-4_086","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090021889","display_name":"Claudia Matos Veliz","orcid":null},"institutions":[{"id":"https://openalex.org/I32597200","display_name":"Ghent University","ror":"https://ror.org/00cv9y106","country_code":"BE","type":"education","lineage":["https://openalex.org/I32597200"]}],"countries":["BE"],"is_corresponding":true,"raw_author_name":"Claudia Matos Veliz","raw_affiliation_strings":["Language and Translation Technology Team -Ghent Univeristy Groot-Brittannilaan 45, 9000, Ghent, Belgium","Language and Translation Technology Team -Ghent Univeristy Groot-Brittanni\u00eblaan 45, 9000, Ghent, Belgium"],"affiliations":[{"raw_affiliation_string":"Language and Translation Technology Team -Ghent Univeristy Groot-Brittannilaan 45, 9000, Ghent, Belgium","institution_ids":["https://openalex.org/I32597200"]},{"raw_affiliation_string":"Language and Translation Technology Team -Ghent Univeristy Groot-Brittanni\u00eblaan 45, 9000, Ghent, Belgium","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081335221","display_name":"Orph\u00e9e De Clercq","orcid":"https://orcid.org/0000-0002-6090-5552"},"institutions":[{"id":"https://openalex.org/I32597200","display_name":"Ghent University","ror":"https://ror.org/00cv9y106","country_code":"BE","type":"education","lineage":["https://openalex.org/I32597200"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Orph\u00e9e De Clercq","raw_affiliation_strings":["Language and Translation Technology Team -Ghent Univeristy Groot-Brittannilaan 45, 9000, Ghent, Belgium","Language and Translation Technology Team -Ghent Univeristy Groot-Brittanni\u00eblaan 45, 9000, Ghent, Belgium"],"affiliations":[{"raw_affiliation_string":"Language and Translation Technology Team -Ghent Univeristy Groot-Brittannilaan 45, 9000, Ghent, Belgium","institution_ids":["https://openalex.org/I32597200"]},{"raw_affiliation_string":"Language and Translation Technology Team -Ghent Univeristy Groot-Brittanni\u00eblaan 45, 9000, Ghent, Belgium","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019867041","display_name":"V\u00e9ronique Hoste","orcid":"https://orcid.org/0000-0002-0539-4630"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"V\u00e9ronique Hoste","raw_affiliation_strings":["Language and Translation Technology Team -Ghent Univeristy Groot-Brittanni\u00eblaan 45, 9000, Ghent, Belgium"],"affiliations":[{"raw_affiliation_string":"Language and Translation Technology Team -Ghent Univeristy Groot-Brittanni\u00eblaan 45, 9000, Ghent, Belgium","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5090021889"],"corresponding_institution_ids":["https://openalex.org/I32597200"],"apc_list":null,"apc_paid":null,"fwci":0.8736,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.81616552,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"740","last_page":"749"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.8663097620010376},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8312990665435791},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.7996566295623779},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7048093676567078},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6843198537826538},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.5664774179458618},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5388683080673218},{"id":"https://openalex.org/keywords/test-data","display_name":"Test data","score":0.46204090118408203},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.43587854504585266},{"id":"https://openalex.org/keywords/parallel-corpora","display_name":"Parallel corpora","score":0.41007137298583984},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.10368940234184265}],"concepts":[{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.8663097620010376},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8312990665435791},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.7996566295623779},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7048093676567078},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6843198537826538},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.5664774179458618},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5388683080673218},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.46204090118408203},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.43587854504585266},{"id":"https://openalex.org/C2985367798","wikidata":"https://www.wikidata.org/wiki/Q1346592","display_name":"Parallel corpora","level":3,"score":0.41007137298583984},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.10368940234184265},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.26615/978-954-452-056-4_086","is_oa":true,"landing_page_url":"https://doi.org/10.26615/978-954-452-056-4_086","pdf_url":"https://doi.org/10.26615/978-954-452-056-4_086","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings - Natural Language Processing in a Deep Learning World","raw_type":"proceedings-article"},{"id":"pmh:oai:archive.ugent.be:8629116","is_oa":true,"landing_page_url":"http://hdl.handle.net/1854/LU-8629116","pdf_url":"https://biblio.ugent.be/publication/8629116/file/01KATKY1B6RSMZZXB61Z3WRJ33.pdf","source":{"id":"https://openalex.org/S4306400478","display_name":"Ghent University Academic Bibliography (Ghent University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I32597200","host_organization_name":"Ghent University","host_organization_lineage":["https://openalex.org/I32597200"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ISBN: 9789544520564","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:archive.ugent.be:8629116","is_oa":false,"landing_page_url":"https://biblio.ugent.be/publication/8629116","pdf_url":null,"source":{"id":"https://openalex.org/S4306400478","display_name":"Ghent University Academic Bibliography (Ghent University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I32597200","host_organization_name":"Ghent University","host_organization_lineage":["https://openalex.org/I32597200"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of Recent Advances in Natural Language Processing (RANLP 2019) : natural language processing in a deep learining world","raw_type":"conference"}],"best_oa_location":{"id":"doi:10.26615/978-954-452-056-4_086","is_oa":true,"landing_page_url":"https://doi.org/10.26615/978-954-452-056-4_086","pdf_url":"https://doi.org/10.26615/978-954-452-056-4_086","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings - Natural Language Processing in a Deep Learning World","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6499999761581421,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2986227781.pdf","grobid_xml":"https://content.openalex.org/works/W2986227781.grobid-xml"},"referenced_works_count":42,"referenced_works":["https://openalex.org/W46912262","https://openalex.org/W630532510","https://openalex.org/W1519942606","https://openalex.org/W1631260214","https://openalex.org/W1902237438","https://openalex.org/W2005241960","https://openalex.org/W2032355794","https://openalex.org/W2053966956","https://openalex.org/W2083730031","https://openalex.org/W2101761627","https://openalex.org/W2106045980","https://openalex.org/W2119595472","https://openalex.org/W2124807415","https://openalex.org/W2130942839","https://openalex.org/W2133564696","https://openalex.org/W2137606428","https://openalex.org/W2146867136","https://openalex.org/W2157331557","https://openalex.org/W2163942301","https://openalex.org/W2164107060","https://openalex.org/W2180008620","https://openalex.org/W2250435758","https://openalex.org/W2251969247","https://openalex.org/W2371227879","https://openalex.org/W2546744831","https://openalex.org/W2737886250","https://openalex.org/W2740691457","https://openalex.org/W2742947407","https://openalex.org/W2773842746","https://openalex.org/W2786387580","https://openalex.org/W2800071984","https://openalex.org/W2803667244","https://openalex.org/W2805275761","https://openalex.org/W2911944143","https://openalex.org/W2914314925","https://openalex.org/W2963212250","https://openalex.org/W2964235839","https://openalex.org/W2964308564","https://openalex.org/W3099138433","https://openalex.org/W4285719527","https://openalex.org/W4295276571","https://openalex.org/W6942214427"],"related_works":["https://openalex.org/W2786253471","https://openalex.org/W3175595715","https://openalex.org/W2972060578","https://openalex.org/W4285877427","https://openalex.org/W783305165","https://openalex.org/W4293584592","https://openalex.org/W2986030184","https://openalex.org/W2104907655","https://openalex.org/W4287212313","https://openalex.org/W3155572818"],"abstract_inverted_index":{"One":[0,34],"of":[1,5,12,97,167],"the":[2,10,71,81,94,103,116,137,151,161,191],"main":[3],"characteristics":[4],"social":[6,31],"media":[7,32],"data":[8,113,185],"is":[9,39,140,148],"use":[11,143],"non-standard":[13],"language.":[14],"Since":[15],"NLP":[16],"tools":[17],"have":[18,109],"been":[19],"trained":[20],"on":[21,133],"traditional":[22],"text":[23,43,50,57,62],"material,":[24],"their":[25],"performance":[26],"drops":[27],"when":[28,131],"applied":[29],"to":[30,36,40,52,135,142,150,153],"data.":[33,192],"way":[35],"overcome":[37],"this":[38,46,168],"first":[41],"perform":[42,136],"normalization.":[44],"In":[45],"work,":[47],"we":[48,92,108,119,158],"apply":[49],"normalization":[51,72],"noisy":[53],"English":[54],"and":[55,67,79,86,174],"Dutch":[56,181],"coming":[58],"from":[59,172],"different":[60],"genres:":[61],"messages,":[63],"message":[64],"board":[65],"posts":[66],"tweets.":[68],"We":[69],"consider":[70],"task":[73],"as":[74],"a":[75,110,144,177],"Machine":[76],"Translation":[77],"problem":[78],"test":[80],"two":[82],"leading":[83],"paradigms:":[84],"statistical":[85],"neural":[87],"machine":[88],"translation.":[89],"For":[90,106],"SMT":[91,134],"explore":[93],"added":[95],"value":[96],"varying":[98],"background":[99,145],"corpora":[100],"for":[101,176],"training":[102,184],"language":[104,179],"model.":[105],"NMT":[107],"look":[111],"at":[112],"augmentation":[114],"since":[115],"parallel":[117],"datasets":[118],"are":[120,123,170],"working":[121],"with":[122],"limited":[124],"in":[125],"size.":[126],"Our":[127],"results":[128],"reveal":[129],"that":[130,147,160,175],"relying":[132],"normalization,":[138],"it":[139],"beneficial":[141],"corpus":[146],"close":[149],"genre":[152],"be":[154],"normalized.":[155],"Regarding":[156],"NMT,":[157],"find":[159],"translations":[162],"-or":[163],"normalizations":[164],"-coming":[165],"out":[166],"model":[169],"far":[171],"perfect":[173],"lowresource":[178],"like":[180],"adding":[182],"additional":[183],"works":[186],"better":[187],"than":[188],"artificially":[189],"augmenting":[190]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":2}],"updated_date":"2026-04-04T08:04:53.788161","created_date":"2019-11-22T00:00:00"}
