{"id":"https://openalex.org/W2970822246","doi":"https://doi.org/10.18653/v1/w19-4613","title":"Segmentation for Domain Adaptation in Arabic","display_name":"Segmentation for Domain Adaptation in Arabic","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2970822246","doi":"https://doi.org/10.18653/v1/w19-4613","mag":"2970822246"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w19-4613","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-4613","pdf_url":"https://www.aclweb.org/anthology/W19-4613.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Arabic Natural Language Processing Workshop","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W19-4613.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075586667","display_name":"Mohammed Attia","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mohammed Attia","raw_affiliation_strings":["Google LLC New York, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC New York, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5091764449","display_name":"Ali Elkahky","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ali Elkahky","raw_affiliation_strings":["Google LLC New York, USA"],"affiliations":[{"raw_affiliation_string":"Google LLC New York, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5075586667"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":0.14,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.56630758,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"119","last_page":"129"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8363052606582642},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.7915285229682922},{"id":"https://openalex.org/keywords/lexicon","display_name":"Lexicon","score":0.7241500020027161},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7032116055488586},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.659573495388031},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6559593677520752},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.5715099573135376},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.5422500371932983},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.4896095395088196},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.4855441749095917},{"id":"https://openalex.org/keywords/arabic","display_name":"Arabic","score":0.47977501153945923},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.45554015040397644},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.4522259533405304},{"id":"https://openalex.org/keywords/domain-adaptation","display_name":"Domain adaptation","score":0.4394621253013611},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.42899036407470703},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3532024621963501},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.16135084629058838},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0724743902683258}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8363052606582642},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.7915285229682922},{"id":"https://openalex.org/C2778121359","wikidata":"https://www.wikidata.org/wiki/Q8096","display_name":"Lexicon","level":2,"score":0.7241500020027161},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7032116055488586},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.659573495388031},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6559593677520752},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.5715099573135376},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.5422500371932983},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.4896095395088196},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.4855441749095917},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.47977501153945923},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.45554015040397644},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.4522259533405304},{"id":"https://openalex.org/C2776434776","wikidata":"https://www.wikidata.org/wiki/Q19246213","display_name":"Domain adaptation","level":3,"score":0.4394621253013611},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.42899036407470703},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3532024621963501},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.16135084629058838},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0724743902683258},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/w19-4613","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-4613","pdf_url":"https://www.aclweb.org/anthology/W19-4613.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Arabic Natural Language Processing Workshop","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/w19-4613","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-4613","pdf_url":"https://www.aclweb.org/anthology/W19-4613.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Arabic Natural Language Processing Workshop","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.7200000286102295,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2970822246.pdf","grobid_xml":"https://content.openalex.org/works/W2970822246.grobid-xml"},"referenced_works_count":24,"referenced_works":["https://openalex.org/W81000870","https://openalex.org/W131663347","https://openalex.org/W181643614","https://openalex.org/W331019419","https://openalex.org/W984609824","https://openalex.org/W1497611705","https://openalex.org/W1568793342","https://openalex.org/W1647671624","https://openalex.org/W1970026646","https://openalex.org/W2048978997","https://openalex.org/W2094061585","https://openalex.org/W2100976324","https://openalex.org/W2250562293","https://openalex.org/W2250659129","https://openalex.org/W2250732891","https://openalex.org/W2250854801","https://openalex.org/W2250861254","https://openalex.org/W2330670832","https://openalex.org/W2471147443","https://openalex.org/W2579343286","https://openalex.org/W2614862557","https://openalex.org/W2740130688","https://openalex.org/W2747566258","https://openalex.org/W2795367096"],"related_works":["https://openalex.org/W3011059803","https://openalex.org/W2756978580","https://openalex.org/W2577063019","https://openalex.org/W2972060578","https://openalex.org/W4285877427","https://openalex.org/W783305165","https://openalex.org/W2990352702","https://openalex.org/W2095908250","https://openalex.org/W2531741693","https://openalex.org/W2550455130"],"abstract_inverted_index":{"Segmentation":[0,90],"serves":[1],"as":[2],"an":[3],"integral":[4],"part":[5],"in":[6,83],"many":[7],"NLP":[8],"applications":[9],"including":[10],"Machine":[11],"Translation,":[12],"Parsing,":[13],"and":[14,44,66,76,106,129],"Information":[15],"Retrieval.":[16],"When":[17],"a":[18,61,93,117],"model":[19],"trained":[20],"on":[21],"the":[22,29,41,98,104,111],"standard":[23,42],"language":[24,43],"is":[25,58,146],"applied":[26],"to":[27],"dialects,":[28],"accuracy":[30],"drops":[31],"dramatically.":[32],"However,":[33],"there":[34],"are":[35],"more":[36],"lexical":[37],"items":[38],"shared":[39,56],"by":[40,50,60,86,96,127,136],"dialects":[45,80,135],"than":[46],"can":[47,81],"be":[48],"found":[49],"mere":[51],"surface":[52],"word":[53],"matching.":[54],"This":[55],"lexicon":[57],"obscured":[59],"lot":[62],"of":[63,79,100,110,120],"cliticization,":[64],"gemination,":[65],"character":[67],"repetition.":[68],"In":[69],"this":[70],"paper,":[71],"we":[72],"prove":[73],"that":[74,115],"segmentation":[75,122],"base":[77],"normalization":[78],"help":[82,102],"domain":[84],"adaptation":[85],"reducing":[87,97],"data":[88,124,145],"sparseness.":[89],"will":[91],"improve":[92],"system":[94],"performance":[95],"number":[99],"OOVs,":[101],"isolate":[103],"differences":[105],"allow":[107],"better":[108],"utilization":[109],"commonalities.":[112],"We":[113],"show":[114],"adding":[116],"small":[118],"amount":[119],"dialectal":[121],"training":[123,144],"reduced":[125],"OOVs":[126],"5%":[128],"remarkably":[130],"improves":[131],"POS":[132,143],"tagging":[133],"for":[134],"7.37%":[137],"f-score,":[138],"even":[139],"though":[140],"no":[141],"dialect-specific":[142],"included.":[147]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
