{"id":"https://openalex.org/W2915284081","doi":"https://doi.org/10.1109/padsw.2018.8644562","title":"Seal: Efficient Training Large Scale Statistical Machine Translation Models on Spark","display_name":"Seal: Efficient Training Large Scale Statistical Machine Translation Models on Spark","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2915284081","doi":"https://doi.org/10.1109/padsw.2018.8644562","mag":"2915284081"},"language":"en","primary_location":{"id":"doi:10.1109/padsw.2018.8644562","is_oa":false,"landing_page_url":"https://doi.org/10.1109/padsw.2018.8644562","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE 24th International Conference on Parallel and Distributed Systems (ICPADS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052175650","display_name":"Rong Gu","orcid":"https://orcid.org/0000-0002-1565-9997"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Rong Gu","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007386772","display_name":"Min Chen","orcid":"https://orcid.org/0000-0002-9502-7681"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Min Chen","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009884942","display_name":"Wen\u2010Jia Yang","orcid":"https://orcid.org/0000-0003-4339-2158"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenjia Yang","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115598059","display_name":"Chunfeng Yuan","orcid":"https://orcid.org/0000-0002-8746-8137"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunfeng Yuan","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007538828","display_name":"Yihua Huang","orcid":"https://orcid.org/0000-0003-1806-0936"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yihua Huang","raw_affiliation_strings":["National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory for Novel Software Technology, Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5052175650"],"corresponding_institution_ids":["https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.18217727,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"19","issue":null,"first_page":"118","last_page":"125"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9767000079154968,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8729116916656494},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.7918188571929932},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6609998941421509},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6600985527038574},{"id":"https://openalex.org/keywords/spark","display_name":"SPARK (programming language)","score":0.5903456807136536},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5520197153091431},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5127928853034973},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.46349501609802246},{"id":"https://openalex.org/keywords/skew","display_name":"Skew","score":0.45877325534820557},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.44013601541519165},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4278743267059326},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.4228048324584961},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.40415075421333313},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3882521688938141},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.20304113626480103},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.1567206084728241},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09035760164260864}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8729116916656494},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.7918188571929932},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6609998941421509},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6600985527038574},{"id":"https://openalex.org/C2781215313","wikidata":"https://www.wikidata.org/wiki/Q3493345","display_name":"SPARK (programming language)","level":2,"score":0.5903456807136536},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5520197153091431},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5127928853034973},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.46349501609802246},{"id":"https://openalex.org/C43711488","wikidata":"https://www.wikidata.org/wiki/Q7534783","display_name":"Skew","level":2,"score":0.45877325534820557},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.44013601541519165},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4278743267059326},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.4228048324584961},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40415075421333313},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3882521688938141},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.20304113626480103},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.1567206084728241},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09035760164260864},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/padsw.2018.8644562","is_oa":false,"landing_page_url":"https://doi.org/10.1109/padsw.2018.8644562","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE 24th International Conference on Parallel and Distributed Systems (ICPADS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1973923101","https://openalex.org/W2006969979","https://openalex.org/W2047055889","https://openalex.org/W2080373976","https://openalex.org/W2082092506","https://openalex.org/W2097927681","https://openalex.org/W2109664771","https://openalex.org/W2118692229","https://openalex.org/W2124807415","https://openalex.org/W2127797489","https://openalex.org/W2131975293","https://openalex.org/W2134800885","https://openalex.org/W2137398441","https://openalex.org/W2166905217","https://openalex.org/W2173213060","https://openalex.org/W2564708601","https://openalex.org/W2572474373","https://openalex.org/W6652311901","https://openalex.org/W6676373471","https://openalex.org/W6677631519","https://openalex.org/W6679815717","https://openalex.org/W6679855610","https://openalex.org/W6684738949"],"related_works":["https://openalex.org/W4290802965","https://openalex.org/W97789383","https://openalex.org/W1975949872","https://openalex.org/W3159871278","https://openalex.org/W2230552005","https://openalex.org/W4289406402","https://openalex.org/W2905242764","https://openalex.org/W2727156679","https://openalex.org/W3087516072","https://openalex.org/W2067997904"],"abstract_inverted_index":{"Statistical":[0],"machine":[1,67],"translation":[2,28,68,118,169,220,253],"(SMT)":[3],"is":[4,93,165,215],"an":[5,78],"important":[6],"research":[7],"branch":[8],"in":[9,53,136,218],"natural":[10],"language":[11,124,225,256],"processing":[12],"(NLP).":[13],"Similar":[14],"to":[15,204],"many":[16],"other":[17],"NLP":[18],"applications,":[19],"large":[20,49,65],"scale":[21,50,66],"training":[22,39,51,70,86,102,135,176,193,222,238],"data":[23,56,179,207,301],"can":[24,41,185],"potentially":[25],"bring":[26],"higher":[27],"accuracy":[29],"for":[30,245],"SMT":[31,37,84,109,237],"models.":[32],"However,":[33],"the":[34,45,54,60,101,105,113,117,121,130,133,154,158,175,178,183,191,198,206,211,219,224,235,251,261,276,280,288],"traditional":[35],"single-node":[36,289],"model":[38,69,85,134,150,170,221,226,248,254,257],"systems":[40],"hardly":[42],"cope":[43],"with":[44,241,265,284,291],"fast-growing":[46],"amount":[47],"of":[48,63,104,132,143,160],"corpus":[52],"big":[55],"era,":[57],"which":[58,92,214],"makes":[59],"urgent":[61],"requirement":[62],"efficient":[64],"systems.":[71],"In":[72,147,168],"this":[73],"paper,":[74],"we":[75,138],"propose":[76,140],"Seal,":[77,137],"efficient,":[79],"scalable,":[80],"and":[81,120,163,223,255,269,287,302],"end-to-end":[82],"offline":[83],"toolkit":[87],"based":[88],"on":[89,210,272,294],"Apache":[90],"Spark":[91],"a":[94,141],"widely-used":[95],"distributed":[96,282],"data-parallel":[97],"platform.":[98],"Seal":[99,233,259,278,298],"parallelizes":[100],"process":[103],"entire":[106],"three":[107],"key":[108],"models":[110],"that":[111,232],"are":[112],"word":[114,148,246],"alignment":[115,149,247],"model,":[116,119,125],"N":[122],"-Gram":[123],"respectively.":[126,274,296],"To":[127],"further":[128],"improve":[129],"performance":[131],"also":[139,196],"number":[142],"system":[144,239,283,290],"optimization":[145],"methods.":[146],"training,":[151,171,258],"by":[152,172],"optimizing":[153],"block":[155],"size":[156,180],"tuning,":[157],"overhead":[159],"IO":[161],"operation":[162,213],"communication":[164],"greatly":[166],"reduced.":[167],"well":[173],"encoding":[174],"corpus,":[177],"transferred":[181],"over":[182],"network":[184],"be":[186],"reduced":[187],"significantly,":[188],"thus":[189],"improving":[190],"overall":[192],"efficiency.":[194],"We":[195],"optimize":[197],"maximum":[199],"likelihood":[200],"estimation":[201],"(MLE)":[202],"algorithm":[203],"solve":[205],"skew":[208],"issue":[209],"join":[212],"adopted":[216],"both":[217],"training.":[227,249],"The":[228],"experiment":[229],"results":[230],"show":[231],"outperforms":[234,260,279],"well-known":[236],"Chaski":[240],"about":[242,266],"5\u00d7":[243],"speedup":[244,268,271,286,293],"For":[250],"syntactic":[252],"existing":[262,281],"cutting-edge":[263],"tools":[264],"9~18\u00d7":[267],"8~9\u00d7":[270],"average,":[273],"On":[275],"whole,":[277],"4~6\u00d7":[285],"9~60\u00d7":[292],"average":[295],"Besides,":[297],"achieves":[299],"near-linear":[300],"node":[303],"scalability.":[304]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
