{"id":"https://openalex.org/W3192314779","doi":"https://doi.org/10.1109/icc42927.2021.9500450","title":"Enhancing Transformer with Horizontal and Vertical Guiding Mechanisms for Neural Language Modeling","display_name":"Enhancing Transformer with Horizontal and Vertical Guiding Mechanisms for Neural Language Modeling","publication_year":2021,"publication_date":"2021-06-01","ids":{"openalex":"https://openalex.org/W3192314779","doi":"https://doi.org/10.1109/icc42927.2021.9500450","mag":"3192314779"},"language":"en","primary_location":{"id":"doi:10.1109/icc42927.2021.9500450","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icc42927.2021.9500450","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICC 2021 - IEEE International Conference on Communications","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018090877","display_name":"Anlin Qu","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Anlin Qu","raw_affiliation_strings":["State Key Laboratory of Virtual Reality Technology and Systems, and Beijing Advanced Innovation Center for Big Data and Brain Computing, School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Virtual Reality Technology and Systems, and Beijing Advanced Innovation Center for Big Data and Brain Computing, School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053316728","display_name":"Jianwei Niu","orcid":"https://orcid.org/0000-0003-3946-5107"},"institutions":[{"id":"https://openalex.org/I4210163873","display_name":"Zhengzhou University of Industrial Technology","ror":"https://ror.org/05es8as59","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210163873"]},{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianwei Niu","raw_affiliation_strings":["Hangzhou Innovation Research Institute of Beihang University, Hangzhou, China","State Key Laboratory of Virtual Reality Technology and Systems, and Beijing Advanced Innovation Center for Big Data and Brain Computing, School of Computer Science and Engineering, Beihang University, Beijing, China","Zhengzhou University Research Institute of Industrial Technology, Zhengzhou University, Zhengzhou, China"],"affiliations":[{"raw_affiliation_string":"Hangzhou Innovation Research Institute of Beihang University, Hangzhou, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"State Key Laboratory of Virtual Reality Technology and Systems, and Beijing Advanced Innovation Center for Big Data and Brain Computing, School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"Zhengzhou University Research Institute of Industrial Technology, Zhengzhou University, Zhengzhou, China","institution_ids":["https://openalex.org/I4210163873"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078214911","display_name":"Shasha Mo","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shasha Mo","raw_affiliation_strings":["School of Cyber Science and Technology, Beihang University, Beijing, China","State Key Laboratory of Virtual Reality Technology and Systems, and Beijing Advanced Innovation Center for Big Data and Brain Computing, School of Computer Science and Engineering, Beihang University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Cyber Science and Technology, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]},{"raw_affiliation_string":"State Key Laboratory of Virtual Reality Technology and Systems, and Beijing Advanced Innovation Center for Big Data and Brain Computing, School of Computer Science and Engineering, Beihang University, Beijing, China","institution_ids":["https://openalex.org/I82880672"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5018090877"],"corresponding_institution_ids":["https://openalex.org/I82880672"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.10877246,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.982200026512146,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perplexity","display_name":"Perplexity","score":0.7474579215049744},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7268046736717224},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7164311408996582},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6144355535507202},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.609644889831543},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5348094701766968},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46765679121017456},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3748112916946411},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3466445505619049},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.15386736392974854},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.1145918071269989}],"concepts":[{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.7474579215049744},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7268046736717224},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7164311408996582},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6144355535507202},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.609644889831543},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5348094701766968},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46765679121017456},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3748112916946411},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3466445505619049},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.15386736392974854},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.1145918071269989},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icc42927.2021.9500450","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icc42927.2021.9500450","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICC 2021 - IEEE International Conference on Communications","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6499999761581421,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":68,"referenced_works":["https://openalex.org/W1815076433","https://openalex.org/W2064675550","https://openalex.org/W2157331557","https://openalex.org/W2194775991","https://openalex.org/W2519314406","https://openalex.org/W2567070169","https://openalex.org/W2608787653","https://openalex.org/W2757047188","https://openalex.org/W2798858969","https://openalex.org/W2896457183","https://openalex.org/W2896528354","https://openalex.org/W2915716523","https://openalex.org/W2951672049","https://openalex.org/W2955227499","https://openalex.org/W2956480774","https://openalex.org/W2962832505","https://openalex.org/W2963016848","https://openalex.org/W2963034893","https://openalex.org/W2963088785","https://openalex.org/W2963341956","https://openalex.org/W2963403868","https://openalex.org/W2963494889","https://openalex.org/W2963537482","https://openalex.org/W2963631907","https://openalex.org/W2963735467","https://openalex.org/W2963925437","https://openalex.org/W2963938518","https://openalex.org/W2963970792","https://openalex.org/W2963983719","https://openalex.org/W2964110616","https://openalex.org/W2984864519","https://openalex.org/W2991324852","https://openalex.org/W2994673210","https://openalex.org/W2994809010","https://openalex.org/W3007773043","https://openalex.org/W3034708491","https://openalex.org/W3099023595","https://openalex.org/W4287867419","https://openalex.org/W4288289156","https://openalex.org/W4294555862","https://openalex.org/W4295253143","https://openalex.org/W4295838474","https://openalex.org/W4298422451","https://openalex.org/W4299838440","https://openalex.org/W4385245566","https://openalex.org/W6638545294","https://openalex.org/W6720905350","https://openalex.org/W6725939724","https://openalex.org/W6726378182","https://openalex.org/W6727099177","https://openalex.org/W6731370813","https://openalex.org/W6731780175","https://openalex.org/W6739901393","https://openalex.org/W6742632731","https://openalex.org/W6744707562","https://openalex.org/W6745265922","https://openalex.org/W6751097180","https://openalex.org/W6754905691","https://openalex.org/W6755207826","https://openalex.org/W6755868333","https://openalex.org/W6765264507","https://openalex.org/W6765571568","https://openalex.org/W6769823692","https://openalex.org/W6770514768","https://openalex.org/W6771626834","https://openalex.org/W6773526639","https://openalex.org/W6774244609","https://openalex.org/W6780226713"],"related_works":["https://openalex.org/W2169518243","https://openalex.org/W2252095989","https://openalex.org/W2551914602","https://openalex.org/W4322096525","https://openalex.org/W2020757772","https://openalex.org/W4281893144","https://openalex.org/W2105076537","https://openalex.org/W3013624417","https://openalex.org/W4287826556","https://openalex.org/W3049463507"],"abstract_inverted_index":{"Language":[0,8],"modeling":[1,145],"is":[2,16,78,87,111],"an":[3,91],"important":[4],"problem":[5],"in":[6,33,75,113],"Natural":[7],"Processing":[9],"(NLP),":[10],"and":[11,21,81,94,120,157],"the":[12,18,42,56,70,82,126,130,148,153,167],"multi-layer":[13],"Transformer":[14,77],"network":[15,57],"currently":[17],"most":[19],"advanced":[20],"effective":[22],"model":[23],"for":[24],"this":[25,114],"task.":[26],"However,":[27],"there":[28],"exist":[29],"two":[30,103],"inherent":[31],"defects":[32],"its":[34],"multi-head":[35,68,131],"self-attention":[36,132],"structure:":[37],"(1)":[38],"attention":[39,44,61,122],"information":[40,62,123],"loss:":[41],"lower-level":[43,65],"weights":[45],"cannot":[46],"be":[47],"explicitly":[48],"passed":[49],"through":[50],"upper":[51],"layers,":[52],"which":[53,89],"may":[54],"lead":[55],"lose":[58],"some":[59],"pivotal":[60],"captured":[63],"by":[64],"layers;":[66],"(2)":[67],"bottleneck:":[69],"dimension":[71],"of":[72,84,129,150],"each":[73,85],"head":[74,86],"vanilla":[76],"relatively":[79],"small":[80],"process":[83,128],"independent,":[88],"introduces":[90],"expressive":[92],"bottleneck":[93],"makes":[95],"subspace":[96],"learning":[97],"inadequate":[98],"constitutionally.":[99],"To":[100],"overcome":[101],"these":[102],"weaknesses,":[104],"a":[105],"novel":[106],"neural":[107],"architecture":[108],"named":[109],"Guide-Transformer":[110,117,162],"proposed":[112],"paper.":[115],"The":[116,138],"utilizes":[118],"horizontal":[119],"vertical":[121],"to":[124],"guide":[125],"original":[127],"sublayer":[133],"without":[134],"introducing":[135],"excessive":[136],"complexity.":[137],"experimental":[139],"results":[140],"on":[141],"three":[142],"authoritative":[143],"language":[144],"benchmarks":[146],"demonstrate":[147],"effectiveness":[149],"Guide-Transformer.":[151],"For":[152],"popular":[154],"perplexity":[155],"(ppl)":[156],"bits-per-character":[158],"(bpc)":[159],"evaluation":[160],"metrics,":[161],"achieves":[163],"moderate":[164],"improvements":[165],"over":[166],"powerful":[168],"baseline":[169],"model.":[170]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
