{"id":"https://openalex.org/W2989417980","doi":"https://doi.org/10.18653/v1/k19-1016","title":"Improving Natural Language Understanding by Reverse Mapping Bytepair Encoding","display_name":"Improving Natural Language Understanding by Reverse Mapping Bytepair Encoding","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2989417980","doi":"https://doi.org/10.18653/v1/k19-1016","mag":"2989417980"},"language":"en","primary_location":{"id":"doi:10.18653/v1/k19-1016","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/k19-1016","pdf_url":"https://www.aclweb.org/anthology/K19-1016.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/K19-1016.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012247942","display_name":"Chaodong Tong","orcid":"https://orcid.org/0000-0001-8767-5833"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chaodong Tong","raw_affiliation_strings":["Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China","School of Cyber Security, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210156404","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Cyber Security, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035322416","display_name":"Huailiang Peng","orcid":"https://orcid.org/0009-0000-4943-8853"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huailiang Peng","raw_affiliation_strings":["Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China","School of Cyber Security, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210156404","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Cyber Security, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102024077","display_name":"Qiong Dai","orcid":"https://orcid.org/0000-0002-9094-1933"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiong Dai","raw_affiliation_strings":["Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China","School of Cyber Security, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210156404","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Cyber Security, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103098320","display_name":"Lei Jiang","orcid":"https://orcid.org/0000-0003-4795-0284"},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]},{"id":"https://openalex.org/I4210156404","display_name":"Institute of Information Engineering","ror":"https://ror.org/04r53se39","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210156404"]},{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lei Jiang","raw_affiliation_strings":["Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China","School of Cyber Security, University of Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute of Information Engineering, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210156404","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Cyber Security, University of Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070744644","display_name":"Jianghua Huang","orcid":"https://orcid.org/0000-0003-0437-1303"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jianghua Huang","raw_affiliation_strings":["Meituan-Dianping Group, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Meituan-Dianping Group, Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5103098320"],"corresponding_institution_ids":["https://openalex.org/I19820366","https://openalex.org/I4210156404","https://openalex.org/I4210165038"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.14522395,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"163","last_page":"173"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7741323113441467},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7728601694107056},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.698972761631012},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6238534450531006},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5085436105728149},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5085235834121704},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.46147486567497253},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.45582661032676697},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.43595826625823975},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4351412057876587},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.424416184425354},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.42217960953712463},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07489421963691711}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7741323113441467},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7728601694107056},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.698972761631012},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6238534450531006},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5085436105728149},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5085235834121704},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.46147486567497253},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.45582661032676697},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.43595826625823975},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4351412057876587},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.424416184425354},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.42217960953712463},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07489421963691711},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/k19-1016","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/k19-1016","pdf_url":"https://www.aclweb.org/anthology/K19-1016.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/k19-1016","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/k19-1016","pdf_url":"https://www.aclweb.org/anthology/K19-1016.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 23rd Conference on Computational Natural Language Learning (CoNLL)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.800000011920929,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G3667409424","display_name":null,"funder_award_id":"2016YFB0801302","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G466649759","display_name":null,"funder_award_id":"2017Y","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G614122021","display_name":null,"funder_award_id":"2017YFB0803003","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G8114646031","display_name":null,"funder_award_id":"2016Y","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G8340078520","display_name":null,"funder_award_id":"2017YF","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G8823009050","display_name":null,"funder_award_id":"2017YFB","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2989417980.pdf","grobid_xml":"https://content.openalex.org/works/W2989417980.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W1491611863","https://openalex.org/W1614298861","https://openalex.org/W2125031621","https://openalex.org/W2130158090","https://openalex.org/W2140679639","https://openalex.org/W2153579005","https://openalex.org/W2164019165","https://openalex.org/W2250418535","https://openalex.org/W2250539671","https://openalex.org/W2251771443","https://openalex.org/W2251939518","https://openalex.org/W2394700483","https://openalex.org/W2396767181","https://openalex.org/W2493916176","https://openalex.org/W2692059227","https://openalex.org/W2759366113","https://openalex.org/W2788496822","https://openalex.org/W2799046069","https://openalex.org/W2896457183","https://openalex.org/W2950577311","https://openalex.org/W2953356739","https://openalex.org/W2962739339","https://openalex.org/W2962784628","https://openalex.org/W2962951088","https://openalex.org/W2963045354","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2963626623","https://openalex.org/W2963876447","https://openalex.org/W2963929190","https://openalex.org/W2963979492","https://openalex.org/W4234388646","https://openalex.org/W4285719527","https://openalex.org/W4294170691","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W3199871245","https://openalex.org/W2915436880","https://openalex.org/W1964783010","https://openalex.org/W3198729192","https://openalex.org/W3121292129","https://openalex.org/W4295038225","https://openalex.org/W4285819037","https://openalex.org/W2800535933","https://openalex.org/W4387517132","https://openalex.org/W2786460063"],"abstract_inverted_index":{"Recently,":[0],"language":[1,5,13,45],"models":[2,7],"(LMs)":[3],"or":[4,35],"representation":[6],"are":[8,20],"widely":[9],"used":[10],"in":[11,63,155],"natural":[12],"understanding":[14],"(NLU)":[15],"tasks.":[16],"However,":[17],"these":[18],"LMs":[19],"usually":[21],"trained":[22],"on":[23,57,136,163,167,170,174],"large":[24],"unlabeled":[25],"text":[26],"corpora,":[27],"while":[28],"the":[29,42,51,88,100,115,121,124,145,152],"finetuning":[30],"process":[31,132],"simply":[32],"takes":[33],"words":[34,60],"wordpieces":[36],"as":[37,114],"model":[38,46,111],"input.":[39],"Because":[40],"of":[41,53,55,91,123,147],"differences":[43],"between":[44],"and":[47,79,141,172],"NLU":[48],"task":[49],"objectives,":[50],"problem":[52],"lack":[54],"concern":[56],"some":[58],"key":[59],"exists.":[61],"Thus":[62],"this":[64,97],"paper,":[65],"we":[66],"propose":[67,108],"a":[68,109,130],"method":[69,98],"called":[70],"reverse":[71],"mapping":[72],"bytepair":[73,92],"encoding,":[74],"which":[75],"maps":[76],"named-entity":[77],"information":[78,127],"other":[80],"word-level":[81],"linguistic":[82],"features":[83],"back":[84],"to":[85,99,119],"subwords":[86],"during":[87],"encoding":[89,93],"procedure":[90],"(BPE).":[94],"We":[95,106],"employ":[96],"Generative":[101],"Pre-trained":[102],"Transformer":[103],"(OpenAI":[104],"GPT)":[105],"also":[107],"new":[110],"architecture":[112],"named":[113],"multi-channel":[116],"separate":[117],"transformer":[118],"evaluate":[120],"effectiveness":[122,146],"newly":[125],"introduced":[126],"by":[128],"employing":[129],"training":[131],"without":[133],"parameter-sharing.":[134],"Experiments":[135],"Story":[137],"Cloze,":[138,165],"RTE,":[139,168],"SciTail":[140,171],"SST-2":[142],"datasets":[143],"demonstrate":[144],"our":[148,157],"approach.":[149],"Compared":[150],"with":[151],"original":[153],"results":[154],"GPT,":[156],"approach":[158],"gains":[159],"1.58%":[160],"absolute":[161],"increase":[162],"Stories":[164],"6.4%":[166],"0.69%":[169],"0.8%":[173],"SST-2.":[175]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
