{"id":"https://openalex.org/W2970231882","doi":"https://doi.org/10.18653/v1/w19-5212","title":"A High-Quality Multilingual Dataset for Structured Documentation Translation","display_name":"A High-Quality Multilingual Dataset for Structured Documentation Translation","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2970231882","doi":"https://doi.org/10.18653/v1/w19-5212","mag":"2970231882"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w19-5212","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-5212","pdf_url":"https://www.aclweb.org/anthology/W19-5212.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Conference on Machine Translation (Volume 1: Research Papers)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W19-5212.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054582135","display_name":"Kazuma Hashimoto","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kazuma Hashimoto","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082979210","display_name":"Raffaella Buschiazzo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Raffaella Buschiazzo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064976870","display_name":"James Bradbury","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"James Bradbury","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051206907","display_name":"Teresa A. Marshall","orcid":"https://orcid.org/0000-0002-1615-0797"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Teresa Marshall","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059955534","display_name":"Richard Socher","orcid":"https://orcid.org/0000-0002-3577-639X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Richard Socher","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5032046813","display_name":"Caiming Xiong","orcid":"https://orcid.org/0000-0003-0349-8628"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Caiming Xiong","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5054582135"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.4002,"has_fulltext":true,"cited_by_count":13,"citation_normalized_percentile":{"value":0.86398342,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"116","last_page":"127"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9886999726295471,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8707093000411987},{"id":"https://openalex.org/keywords/documentation","display_name":"Documentation","score":0.8440104722976685},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.6771761178970337},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.5736632347106934},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5513901710510254},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5496149063110352},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.5426957011222839},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5147988200187683},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5143843293190002},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4813852608203888},{"id":"https://openalex.org/keywords/document-structure-description","display_name":"Document Structure Description","score":0.42133161425590515},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.32582026720046997},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2103116810321808}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8707093000411987},{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.8440104722976685},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.6771761178970337},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.5736632347106934},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5513901710510254},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5496149063110352},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.5426957011222839},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5147988200187683},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5143843293190002},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4813852608203888},{"id":"https://openalex.org/C68699486","wikidata":"https://www.wikidata.org/wiki/Q265904","display_name":"Document Structure Description","level":3,"score":0.42133161425590515},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.32582026720046997},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2103116810321808},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/w19-5212","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-5212","pdf_url":"https://www.aclweb.org/anthology/W19-5212.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Conference on Machine Translation (Volume 1: Research Papers)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/w19-5212","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w19-5212","pdf_url":"https://www.aclweb.org/anthology/W19-5212.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Fourth Conference on Machine Translation (Volume 1: Research Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.49000000953674316}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2970231882.pdf","grobid_xml":"https://content.openalex.org/works/W2970231882.grobid-xml"},"referenced_works_count":32,"referenced_works":["https://openalex.org/W2101105183","https://openalex.org/W2123442489","https://openalex.org/W2124807415","https://openalex.org/W2127863960","https://openalex.org/W2242083635","https://openalex.org/W2413794162","https://openalex.org/W2496235729","https://openalex.org/W2549416390","https://openalex.org/W2550821151","https://openalex.org/W2594047108","https://openalex.org/W2595715041","https://openalex.org/W2606974598","https://openalex.org/W2767019613","https://openalex.org/W2772882909","https://openalex.org/W2780664814","https://openalex.org/W2788330850","https://openalex.org/W2809324505","https://openalex.org/W2891534142","https://openalex.org/W2924210975","https://openalex.org/W2962712961","https://openalex.org/W2962964385","https://openalex.org/W2963062480","https://openalex.org/W2963250244","https://openalex.org/W2963347649","https://openalex.org/W2963403868","https://openalex.org/W2963648186","https://openalex.org/W2963661253","https://openalex.org/W2964120396","https://openalex.org/W2964247056","https://openalex.org/W4293350112","https://openalex.org/W4300428972","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W2618286804","https://openalex.org/W2329643025","https://openalex.org/W3131163342","https://openalex.org/W2166271660","https://openalex.org/W2092256833","https://openalex.org/W2142369114","https://openalex.org/W2002770077","https://openalex.org/W2361728394","https://openalex.org/W2352631095","https://openalex.org/W2883671469"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,96,158],"high-quality":[4],"multilingual":[5],"dataset":[6,103],"for":[7,22,37,66,76,171,176],"the":[8,34,105,122,129,142,165],"documentation":[9,36],"domain":[10,57],"to":[11,99,107,119],"advance":[12],"research":[13],"on":[14,147],"localization":[15],"of":[16,24,140,149,162],"structured":[17],"text.":[18],"Unlike":[19],"widely-used":[20],"datasets":[21],"translation":[23,74,112,126,148],"plain":[25],"text,":[26],"we":[27],"collect":[28],"XML-structured":[29],"parallel":[30],"text":[31,62],"segments":[32,63],"from":[33,49,80],"online":[35],"an":[38,88],"enterprise":[39],"software":[40],"platform.":[41],"These":[42],"Web":[43],"pages":[44],"have":[45],"been":[46],"professionally":[47],"translated":[48],"English":[50],"into":[51],"16":[52,111],"languages":[53,79],"and":[54,59,72,87,128,152,168],"maintained":[55],"by":[56,145],"experts,":[58],"around":[60],"100,000":[61],"are":[64],"available":[65],"each":[67],"language":[68],"pair.":[69],"We":[70,92,136,155],"build":[71],"evaluate":[73],"models":[75],"seven":[77],"target":[78],"English,":[81],"with":[82,95,121],"several":[83],"different":[84],"copy":[85,143],"mechanisms":[86,144],"XML-constrained":[89],"beam":[90,130],"search.":[91],"also":[93,137],"experiment":[94],"non-English":[97],"pair":[98],"show":[100,116],"that":[101,117],"our":[102],"has":[104],"potential":[106],"explicitly":[108],"enable":[109],"17":[110],"settings.":[113],"Our":[114],"experiments":[115],"learning":[118],"translate":[120],"XML":[123,134],"tags":[124],"improves":[125],"accuracy,":[127],"search":[131],"accurately":[132],"generates":[133],"structures.":[135],"discuss":[138],"tradeoffs":[139],"using":[141],"focusing":[146],"numerical":[150],"words":[151],"named":[153],"entities.":[154],"further":[156],"provide":[157],"detailed":[159],"human":[160,169],"analysis":[161],"gaps":[163],"between":[164],"model":[166],"output":[167],"translations":[170],"real-world":[172],"applications,":[173],"including":[174],"suitability":[175],"post-editing.":[177]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
