{"id":"https://openalex.org/W2962785940","doi":"https://doi.org/10.18653/v1/w16-1610","title":"Quantifying the Vanishing Gradient and Long Distance Dependency Problem in Recursive Neural Networks and Recursive LSTMs","display_name":"Quantifying the Vanishing Gradient and Long Distance Dependency Problem in Recursive Neural Networks and Recursive LSTMs","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2962785940","doi":"https://doi.org/10.18653/v1/w16-1610","mag":"2962785940"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w16-1610","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-1610","pdf_url":"https://www.aclweb.org/anthology/W16-1610.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 1st Workshop on Representation Learning for NLP","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W16-1610.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061504918","display_name":"Phong Ba Le","orcid":null},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Phong Le","raw_affiliation_strings":["Institute for Logic , Language and Computation University of Amsterdam , the Netherlands","Institute for Logic, Language and Computation University of Amsterdam, the Netherlands"],"affiliations":[{"raw_affiliation_string":"Institute for Logic , Language and Computation University of Amsterdam , the Netherlands","institution_ids":["https://openalex.org/I887064364"]},{"raw_affiliation_string":"Institute for Logic, Language and Computation University of Amsterdam, the Netherlands","institution_ids":["https://openalex.org/I887064364"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007928903","display_name":"Willem Zuidema","orcid":"https://orcid.org/0000-0002-2362-5447"},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Willem Zuidema","raw_affiliation_strings":["Institute for Logic , Language and Computation University of Amsterdam , the Netherlands","Institute for Logic, Language and Computation University of Amsterdam, the Netherlands"],"affiliations":[{"raw_affiliation_string":"Institute for Logic , Language and Computation University of Amsterdam , the Netherlands","institution_ids":["https://openalex.org/I887064364"]},{"raw_affiliation_string":"Institute for Logic, Language and Computation University of Amsterdam, the Netherlands","institution_ids":["https://openalex.org/I887064364"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5061504918"],"corresponding_institution_ids":["https://openalex.org/I887064364"],"apc_list":null,"apc_paid":null,"fwci":1.7669,"has_fulltext":true,"cited_by_count":61,"citation_normalized_percentile":{"value":0.90253422,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"87","last_page":"93"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.8352223634719849},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8034235239028931},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.7015978693962097},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5974287390708923},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5409219861030579},{"id":"https://openalex.org/keywords/dependency-grammar","display_name":"Dependency grammar","score":0.5275694131851196},{"id":"https://openalex.org/keywords/tree-structure","display_name":"Tree structure","score":0.524105429649353},{"id":"https://openalex.org/keywords/node","display_name":"Node (physics)","score":0.5230118632316589},{"id":"https://openalex.org/keywords/backpropagation","display_name":"Backpropagation","score":0.5050681233406067},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.4838206171989441},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.42234084010124207},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.41911500692367554},{"id":"https://openalex.org/keywords/extension","display_name":"Extension (predicate logic)","score":0.41448915004730225},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4131395220756531},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.41296935081481934},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3315100073814392},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3299618065357208},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11418160796165466}],"concepts":[{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.8352223634719849},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8034235239028931},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.7015978693962097},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5974287390708923},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5409219861030579},{"id":"https://openalex.org/C164883195","wikidata":"https://www.wikidata.org/wiki/Q674834","display_name":"Dependency grammar","level":3,"score":0.5275694131851196},{"id":"https://openalex.org/C163797641","wikidata":"https://www.wikidata.org/wiki/Q2067937","display_name":"Tree structure","level":3,"score":0.524105429649353},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.5230118632316589},{"id":"https://openalex.org/C155032097","wikidata":"https://www.wikidata.org/wiki/Q798503","display_name":"Backpropagation","level":3,"score":0.5050681233406067},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.4838206171989441},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.42234084010124207},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.41911500692367554},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.41448915004730225},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4131395220756531},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.41296935081481934},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3315100073814392},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3299618065357208},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11418160796165466},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C197855036","wikidata":"https://www.wikidata.org/wiki/Q380172","display_name":"Binary tree","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.18653/v1/w16-1610","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-1610","pdf_url":"https://www.aclweb.org/anthology/W16-1610.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 1st Workshop on Representation Learning for NLP","raw_type":"proceedings-article"},{"id":"pmh:oai:dare.uva.nl:openaire_cris_publications/79fd9b72-f030-47d9-ae2e-326b147c55ff","is_oa":true,"landing_page_url":"https://handle.uba.uva.nl/personal/pure/en/publications/quantifying-the-vanishing-gradient-and-long-distance-dependency-problem-in-recursive-neural-networks-and-recursive-lstms(79fd9b72-f030-47d9-ae2e-326b147c55ff).html","pdf_url":"https://pure.uva.nl/ws/files/40405190/W16_1610.pdf","source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Le, P & Zuidema, W 2016, Quantifying the vanishing gradient and long distance dependency problem in recursive neural networks and recursive LSTMs. in P Blunsom, K Cho, S Cohen, E Grefenstette, K M Hermann, L Rimell, J Weston & S W Yih (eds), The 54th Annual Meeting of the Association for Computational Linguistics. Proceedings of the 1st Workshop on Representation Learning for NLP : ACL 2016 : August 11th, 2016, Berlin, Germany. Stroudsburg, PA, pp. 87-93, 1st Workshop on Representation Learning for NLP, Berlin, Berlin, Germany, 11/08/16. https://doi.org/10.18653/v1/W16-1610","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:uvapub:oai:dare.uva.nl:publications/79fd9b72-f030-47d9-ae2e-326b147c55ff","is_oa":true,"landing_page_url":"https://dare.uva.nl/personal/pure/en/publications/quantifying-the-vanishing-gradient-and-long-distance-dependency-problem-in-recursive-neural-networks-and-recursive-lstms(79fd9b72-f030-47d9-ae2e-326b147c55ff).html","pdf_url":"https://dare.uva.nl/personal/pure/en/publications/quantifying-the-vanishing-gradient-and-long-distance-dependency-problem-in-recursive-neural-networks-and-recursive-lstms(79fd9b72-f030-47d9-ae2e-326b147c55ff).html","source":{"id":"https://openalex.org/S4306401843","display_name":"Data Archiving and Networked Services (DANS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1322597698","host_organization_name":"Royal Netherlands Academy of Arts and Sciences","host_organization_lineage":["https://openalex.org/I1322597698"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"The 54th Annual Meeting of the Association for Computational Linguistics. Proceedings of the 1st Workshop on Representation Learning for NLP: ACL 2016 : August 11th, 2016, Berlin, Germany, 87 - 93","raw_type":"info:eu-repo/semantics/conferencepaper"},{"id":"pmh:oai:dare.uva.nl:publications/79fd9b72-f030-47d9-ae2e-326b147c55ff","is_oa":true,"landing_page_url":"https://hdl.handle.net/11245.1/79fd9b72-f030-47d9-ae2e-326b147c55ff","pdf_url":null,"source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Le, P & Zuidema, W 2016, Quantifying the vanishing gradient and long distance dependency problem in recursive neural networks and recursive LSTMs. in P Blunsom, K Cho, S Cohen, E Grefenstette, K M Hermann, L Rimell, J Weston & S W Yih (eds), The 54th Annual Meeting of the Association for Computational Linguistics. Proceedings of the 1st Workshop on Representation Learning for NLP : ACL 2016 : August 11th, 2016, Berlin, Germany. Stroudsburg, PA, pp. 87-93, 1st Workshop on Representation Learning for NLP, Berlin, Berlin, Germany, 11/08/16. https://doi.org/10.18653/v1/W16-1610","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.18653/v1/w16-1610","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-1610","pdf_url":"https://www.aclweb.org/anthology/W16-1610.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 1st Workshop on Representation Learning for NLP","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2962785940.pdf","grobid_xml":"https://content.openalex.org/works/W2962785940.grobid-xml"},"referenced_works_count":11,"referenced_works":["https://openalex.org/W1879966306","https://openalex.org/W2064675550","https://openalex.org/W2107878631","https://openalex.org/W2121029939","https://openalex.org/W2133280805","https://openalex.org/W2146502635","https://openalex.org/W2250644439","https://openalex.org/W2251012068","https://openalex.org/W2251939518","https://openalex.org/W2960930698","https://openalex.org/W2963355447"],"related_works":["https://openalex.org/W2251084681","https://openalex.org/W287510790","https://openalex.org/W2098784136","https://openalex.org/W2130795788","https://openalex.org/W2968543375","https://openalex.org/W2571817549","https://openalex.org/W1541975828","https://openalex.org/W2159336305","https://openalex.org/W2987141700","https://openalex.org/W2953770453"],"abstract_inverted_index":{"Recursive":[0],"neural":[1],"networks":[2,14],"(RNN)":[3],"and":[4,62,67,106,145,154],"their":[5],"recently":[6],"proposed":[7],"extension":[8],"recursive":[9],"long":[10,63],"short":[11],"term":[12],"memory":[13],"(RLSTM)":[15],"are":[16,159],"models":[17,35],"that":[18,54,68,82,97,147],"compute":[19],"representations":[20],"for":[21,92,133],"sentences,":[22],"by":[23],"recursively":[24],"combining":[25],"word":[26],"embeddings":[27],"according":[28],"to":[29,85],"an":[30,78,131],"externally":[31],"provided":[32],"parse":[33],"tree.Both":[34],"thus,":[36],"unlike":[37],"recurrent":[38],"networks,":[39],"explicitly":[40],"make":[41],"use":[42],"of":[43,47,89,100,114,117,137,150,155],"the":[44,59,87,103,115,121,126,148],"hierarchical":[45,152],"structure":[46,153],"a":[48,98,107],"sentence.In":[49],"this":[50],"paper,":[51],"we":[52],"demonstrate":[53],"RNNs":[55],"nevertheless":[56],"suffer":[57],"from":[58],"vanishing":[60],"gradient":[61],"distance":[64],"dependency":[65],"problem,":[66],"RLSTMs":[69,138],"greatly":[70],"improve":[71],"over":[72],"RNN's":[73],"on":[74,139],"these":[75,90],"problems.We":[76],"present":[77],"artificial":[79],"learning":[80],"task":[81],"allows":[83],"us":[84],"quantify":[86],"severity":[88],"problems":[91],"both":[93],"models.We":[94],"further":[95],"show":[96],"ratio":[99],"gradients":[101],"(at":[102],"root":[104],"node":[105],"focal":[108],"leaf":[109],"node)":[110],"is":[111],"highly":[112],"indicative":[113],"success":[116],"backpropagation":[118],"at":[119],"optimizing":[120],"relevant":[122],"weights":[123],"low":[124],"in":[125],"tree.This":[127],"paper":[128],"thus":[129],"provides":[130],"explanation":[132],"existing,":[134],"superior":[135],"results":[136],"tasks":[140],"such":[141],"as":[142],"sentiment":[143],"analysis,":[144],"suggests":[146],"benefits":[149],"including":[151,156],"LSTM-style":[157],"gating":[158],"complementary.":[160]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":15},{"year":2024,"cited_by_count":10},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":11},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}