{"id":"https://openalex.org/W2949303037","doi":"https://doi.org/10.18653/v1/p19-1310","title":"JW300: A Wide-Coverage Parallel Corpus for Low-Resource Languages","display_name":"JW300: A Wide-Coverage Parallel Corpus for Low-Resource Languages","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2949303037","doi":"https://doi.org/10.18653/v1/p19-1310","mag":"2949303037"},"language":"en","primary_location":{"id":"doi:10.18653/v1/p19-1310","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1310","pdf_url":"https://www.aclweb.org/anthology/P19-1310.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/P19-1310.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090835657","display_name":"\u017beljko Agi\u0107","orcid":null},"institutions":[{"id":"https://openalex.org/I83467386","display_name":"IT University of Copenhagen","ror":"https://ror.org/02309jg23","country_code":"DK","type":"education","lineage":["https://openalex.org/I83467386"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"\u017deljko Agi\u0107","raw_affiliation_strings":["Department of Computer Science IT University of Copenhagen, Denmark"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science IT University of Copenhagen, Denmark","institution_ids":["https://openalex.org/I83467386"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014866912","display_name":"Ivan Vuli\u0107","orcid":"https://orcid.org/0000-0002-5161-5422"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ivan Vuli\u0107","raw_affiliation_strings":["PolyAI Ltd. London, United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"PolyAI Ltd. London, United Kingdom","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":21.691,"has_fulltext":true,"cited_by_count":202,"citation_normalized_percentile":{"value":0.99521421,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"3204","last_page":"3210"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8662506341934204},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.827805757522583},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5964499115943909},{"id":"https://openalex.org/keywords/economic-shortage","display_name":"Economic shortage","score":0.5905984044075012},{"id":"https://openalex.org/keywords/parallel-corpora","display_name":"Parallel corpora","score":0.5799408555030823},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5709385871887207},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.564123809337616},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.5127068758010864},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5054378509521484},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4816540777683258},{"id":"https://openalex.org/keywords/part-of-speech-tagging","display_name":"Part-of-speech tagging","score":0.46441930532455444},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.4567472040653229},{"id":"https://openalex.org/keywords/part-of-speech","display_name":"Part of speech","score":0.2297736406326294},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1600230634212494},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.12427455186843872},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.08648654818534851}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8662506341934204},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.827805757522583},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5964499115943909},{"id":"https://openalex.org/C194051981","wikidata":"https://www.wikidata.org/wiki/Q1337691","display_name":"Economic shortage","level":3,"score":0.5905984044075012},{"id":"https://openalex.org/C2985367798","wikidata":"https://www.wikidata.org/wiki/Q1346592","display_name":"Parallel corpora","level":3,"score":0.5799408555030823},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5709385871887207},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.564123809337616},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.5127068758010864},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5054378509521484},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4816540777683258},{"id":"https://openalex.org/C2780684714","wikidata":"https://www.wikidata.org/wiki/Q1271424","display_name":"Part-of-speech tagging","level":3,"score":0.46441930532455444},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.4567472040653229},{"id":"https://openalex.org/C123406163","wikidata":"https://www.wikidata.org/wiki/Q82042","display_name":"Part of speech","level":2,"score":0.2297736406326294},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1600230634212494},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.12427455186843872},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.08648654818534851},{"id":"https://openalex.org/C2778137410","wikidata":"https://www.wikidata.org/wiki/Q2732820","display_name":"Government (linguistics)","level":2,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.18653/v1/p19-1310","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1310","pdf_url":"https://www.aclweb.org/anthology/P19-1310.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},{"id":"pmh:oai:www.repository.cam.ac.uk:1810/296987","is_oa":true,"landing_page_url":"https://www.repository.cam.ac.uk/handle/1810/296987","pdf_url":null,"source":{"id":"https://openalex.org/S4306401777","display_name":"Apollo (University of Cambridge)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I241749","host_organization_name":"University of Cambridge","host_organization_lineage":["https://openalex.org/I241749"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference Object"},{"id":"doi:10.17863/cam.44029","is_oa":true,"landing_page_url":"https://doi.org/10.17863/cam.44029","pdf_url":null,"source":{"id":"https://openalex.org/S7407050737","display_name":"Apollo","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.18653/v1/p19-1310","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1310","pdf_url":"https://www.aclweb.org/anthology/P19-1310.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.49000000953674316,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2949303037.pdf","grobid_xml":"https://content.openalex.org/works/W2949303037.grobid-xml"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W22168010","https://openalex.org/W565549431","https://openalex.org/W630532510","https://openalex.org/W2016630033","https://openalex.org/W2041532239","https://openalex.org/W2115057736","https://openalex.org/W2142523187","https://openalex.org/W2143954309","https://openalex.org/W2143995218","https://openalex.org/W2161044106","https://openalex.org/W2250523604","https://openalex.org/W2251227481","https://openalex.org/W2419539795","https://openalex.org/W2483215953","https://openalex.org/W2493916176","https://openalex.org/W2494423583","https://openalex.org/W2524218611","https://openalex.org/W2538358357","https://openalex.org/W2594021297","https://openalex.org/W2626433756","https://openalex.org/W2807188009","https://openalex.org/W2887436829","https://openalex.org/W2887838996","https://openalex.org/W2888536529","https://openalex.org/W2891896107","https://openalex.org/W2893425640","https://openalex.org/W2901411658","https://openalex.org/W2913897682","https://openalex.org/W2921633540","https://openalex.org/W2942160782","https://openalex.org/W2950018712","https://openalex.org/W2952190837","https://openalex.org/W2962909510","https://openalex.org/W2963047628","https://openalex.org/W2963118869","https://openalex.org/W2963165489","https://openalex.org/W2963472233","https://openalex.org/W2964090065","https://openalex.org/W2964266061","https://openalex.org/W3037697022","https://openalex.org/W3104723404","https://openalex.org/W4285719527","https://openalex.org/W4289282429","https://openalex.org/W4299579390"],"related_works":["https://openalex.org/W1657880117","https://openalex.org/W2595172197","https://openalex.org/W2911655849","https://openalex.org/W4286432911","https://openalex.org/W3134737443","https://openalex.org/W1564027894","https://openalex.org/W103390042","https://openalex.org/W2949303037","https://openalex.org/W2385206281","https://openalex.org/W2358903536"],"abstract_inverted_index":{"Viable":[0],"cross-lingual":[1],"transfer":[2],"critically":[3],"depends":[4],"on":[5,43],"the":[6,50],"availability":[7],"of":[8,12,30],"parallel":[9,28,38],"texts.":[10],"Shortage":[11],"such":[13],"resources":[14],"imposes":[15],"a":[16,27],"development":[17],"and":[18,52,63],"evaluation":[19],"bottleneck":[20],"in":[21,56],"multilingual":[22],"processing.":[23],"We":[24],"introduce":[25],"JW300,":[26],"corpus":[29],"over":[31],"300":[32],"languages":[33],"with":[34,58],"around":[35],"100":[36],"thousand":[37],"sentences":[39],"per":[40],"language":[41],"pair":[42],"average.":[44],"In":[45],"this":[46],"paper,":[47],"we":[48],"present":[49],"resource":[51],"showcase":[53],"its":[54],"utility":[55],"experiments":[57],"crosslingual":[59],"word":[60],"embedding":[61],"induction":[62],"multisource":[64],"part-of-speech":[65],"projection.":[66]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":34},{"year":2022,"cited_by_count":34},{"year":2021,"cited_by_count":62},{"year":2020,"cited_by_count":45},{"year":2019,"cited_by_count":8}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
