{"id":"https://openalex.org/W2742102274","doi":"https://doi.org/10.24963/ijcai.2017/271","title":"Exploration of Tree-based Hierarchical Softmax for Recurrent Language Models","display_name":"Exploration of Tree-based Hierarchical Softmax for Recurrent Language Models","publication_year":2017,"publication_date":"2017-07-28","ids":{"openalex":"https://openalex.org/W2742102274","doi":"https://doi.org/10.24963/ijcai.2017/271","mag":"2742102274"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2017/271","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2017/271","pdf_url":"https://www.ijcai.org/proceedings/2017/0271.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2017/0271.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101452820","display_name":"Nan Jiang","orcid":"https://orcid.org/0000-0001-6863-2897"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Nan Jiang","raw_affiliation_strings":["School of Computer Science and Engineering, Beihang University, China","State Key Laboratory of Software Development Environment, Beihang University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"State Key Laboratory of Software Development Environment, Beihang University, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055420596","display_name":"Wenge Rong","orcid":"https://orcid.org/0000-0002-4229-7215"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenge Rong","raw_affiliation_strings":["School of Computer Science and Engineering, Beihang University, China","State Key Laboratory of Software Development Environment, Beihang University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"State Key Laboratory of Software Development Environment, Beihang University, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002210013","display_name":"Min Gao","orcid":"https://orcid.org/0000-0003-0127-7477"},"institutions":[{"id":"https://openalex.org/I158842170","display_name":"Chongqing University","ror":"https://ror.org/023rhb549","country_code":"CN","type":"education","lineage":["https://openalex.org/I158842170"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Min Gao","raw_affiliation_strings":["School of Software Engineering, Chongqing University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Software Engineering, Chongqing University, China","institution_ids":["https://openalex.org/I158842170"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073742611","display_name":"Yikang Shen","orcid":"https://orcid.org/0000-0001-6836-0510"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yikang Shen","raw_affiliation_strings":["Montr\u00e9al Institute for Learning Algorithms, Universt\u00e9 de Montr\u00e9al, Canada","State Key Laboratory of Software Development Environment, Beihang University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Montr\u00e9al Institute for Learning Algorithms, Universt\u00e9 de Montr\u00e9al, Canada","institution_ids":[]},{"raw_affiliation_string":"State Key Laboratory of Software Development Environment, Beihang University, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100442110","display_name":"Xiong Zhang","orcid":"https://orcid.org/0000-0001-7758-1964"},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhang Xiong","raw_affiliation_strings":["School of Computer Science and Engineering, Beihang University, China","State Key Laboratory of Software Development Environment, Beihang University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Beihang University, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"State Key Laboratory of Software Development Environment, Beihang University, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101452820"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":0.8324,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.80657488,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1951","last_page":"1957"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.9187344312667847},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8014495968818665},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6876838207244873},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.5826719403266907},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5476587414741516},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5420703291893005},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5143662691116333},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.49987030029296875},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.4862586259841919},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.48551368713378906},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.45724841952323914},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4257066547870636},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.34760284423828125},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07783296704292297}],"concepts":[{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.9187344312667847},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8014495968818665},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6876838207244873},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.5826719403266907},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5476587414741516},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5420703291893005},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5143662691116333},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.49987030029296875},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.4862586259841919},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.48551368713378906},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.45724841952323914},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4257066547870636},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34760284423828125},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07783296704292297},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2017/271","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2017/271","pdf_url":"https://www.ijcai.org/proceedings/2017/0271.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2017/271","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2017/271","pdf_url":"https://www.ijcai.org/proceedings/2017/0271.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Sixth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.8100000023841858,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G4970607165","display_name":null,"funder_award_id":"SKLSDE-2017ZX-16","funder_id":"https://openalex.org/F4320326978","funder_display_name":"State Key Laboratory of Software Development Environment"},{"id":"https://openalex.org/G5935354474","display_name":null,"funder_award_id":"61332018","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7475806605","display_name":null,"funder_award_id":"No. 61332018","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320326978","display_name":"State Key Laboratory of Software Development Environment","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2742102274.pdf","grobid_xml":"https://content.openalex.org/works/W2742102274.grobid-xml"},"referenced_works_count":41,"referenced_works":["https://openalex.org/W36903255","https://openalex.org/W179875071","https://openalex.org/W1499166245","https://openalex.org/W1573488949","https://openalex.org/W1614298861","https://openalex.org/W1632114991","https://openalex.org/W1924770834","https://openalex.org/W1938755728","https://openalex.org/W1970689298","https://openalex.org/W2069143585","https://openalex.org/W2097732278","https://openalex.org/W2100664567","https://openalex.org/W2100714283","https://openalex.org/W2120861206","https://openalex.org/W2121227244","https://openalex.org/W2131462252","https://openalex.org/W2138204974","https://openalex.org/W2140679639","https://openalex.org/W2152790380","https://openalex.org/W2158049734","https://openalex.org/W2168148636","https://openalex.org/W2175585630","https://openalex.org/W2217098601","https://openalex.org/W2296167893","https://openalex.org/W2525332836","https://openalex.org/W2950075229","https://openalex.org/W2950133940","https://openalex.org/W2950577311","https://openalex.org/W2951559648","https://openalex.org/W2962784628","https://openalex.org/W2962819663","https://openalex.org/W2963084471","https://openalex.org/W2963932686","https://openalex.org/W2998704965","https://openalex.org/W4285719527","https://openalex.org/W4294170691","https://openalex.org/W4298422451","https://openalex.org/W6601546654","https://openalex.org/W6697000176","https://openalex.org/W6790825729","https://openalex.org/W6863994431"],"related_works":["https://openalex.org/W3107204728","https://openalex.org/W4287591324","https://openalex.org/W3108503355","https://openalex.org/W4226420367","https://openalex.org/W2962876041","https://openalex.org/W3090555870","https://openalex.org/W3022820045","https://openalex.org/W2801655600","https://openalex.org/W1773599773","https://openalex.org/W2268150819"],"abstract_inverted_index":{"Recently,":[0],"variants":[1],"of":[2],"neural":[3,15,19,23],"networks":[4],"for":[5],"computational":[6],"linguistics":[7],"have":[8],"been":[9],"proposed":[10],"and":[11,18,47,55,67,77,107],"successfully":[12],"applied":[13],"to":[14,52],"language":[16,97],"modeling":[17],"machine":[20],"translation.":[21],"These":[22],"models":[24],"can":[25],"leverage":[26],"knowledge":[27],"from":[28,41],"massive":[29],"corpora":[30],"but":[31],"they":[32,37],"are":[33],"extremely":[34],"slow":[35],"as":[36],"predict":[38],"candidate":[39],"words":[40],"a":[42,79],"large":[43],"vocabulary":[44],"during":[45],"training":[46],"inference.":[48],"As":[49],"an":[50],"alternative":[51],"gradient":[53],"approximation":[54],"softmax":[56,65],"with":[57,74,86,100],"class":[58],"decomposition,":[59],"we":[60],"explore":[61],"the":[62],"tree-based":[63,81],"hierarchical":[64,89],"method":[66],"reform":[68],"its":[69],"architecture,":[70],"making":[71],"it":[72],"compatible":[73],"modern":[75],"GPUs":[76],"introducing":[78],"compact":[80],"loss":[82],"function.":[83],"When":[84],"combined":[85],"several":[87],"word":[88],"clustering":[90],"algorithms,":[91],"improved":[92],"performance":[93],"is":[94],"achieved":[95],"in":[96],"modelling":[98],"task":[99],"intrinsic":[101],"evaluation":[102],"criterions":[103],"on":[104],"PTB,":[105],"WikiText-2":[106],"WikiText-103":[108],"datasets.":[109]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":3}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
