{"id":"https://openalex.org/W2952868992","doi":"https://doi.org/10.18653/v1/p19-1384","title":"On the Distribution of Deep Clausal Embeddings: A Large Cross-linguistic Study","display_name":"On the Distribution of Deep Clausal Embeddings: A Large Cross-linguistic Study","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2952868992","doi":"https://doi.org/10.18653/v1/p19-1384","mag":"2952868992"},"language":"en","primary_location":{"id":"doi:10.18653/v1/p19-1384","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1384","pdf_url":"https://www.aclweb.org/anthology/P19-1384.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/P19-1384.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070685856","display_name":"Dami\u00e1n E. Blas\u00ed","orcid":"https://orcid.org/0000-0002-9885-1414"},"institutions":[{"id":"https://openalex.org/I4210160367","display_name":"University Psychiatric Hospital","ror":"https://ror.org/05reesp83","country_code":"SI","type":"healthcare","lineage":["https://openalex.org/I4210160367"]},{"id":"https://openalex.org/I202697423","display_name":"University of Zurich","ror":"https://ror.org/02crff812","country_code":"CH","type":"education","lineage":["https://openalex.org/I202697423"]},{"id":"https://openalex.org/I4210161060","display_name":"Max Planck Institute for the Science of Human History","ror":"https://ror.org/05mjrzy91","country_code":"DE","type":"facility","lineage":["https://openalex.org/I149899117","https://openalex.org/I4210161060"]}],"countries":["CH","DE","SI"],"is_corresponding":true,"raw_author_name":"Damian Blasi","raw_affiliation_strings":[": Max Planck Institute for the Science of Human History",": University of Zrich",": University of Z\u00fcrich"],"affiliations":[{"raw_affiliation_string":": Max Planck Institute for the Science of Human History","institution_ids":["https://openalex.org/I4210161060"]},{"raw_affiliation_string":": University of Zrich","institution_ids":["https://openalex.org/I4210160367"]},{"raw_affiliation_string":": University of Z\u00fcrich","institution_ids":["https://openalex.org/I202697423"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061951606","display_name":"Ryan Cotterell","orcid":"https://orcid.org/0000-0003-4080-1833"},"institutions":[{"id":"https://openalex.org/I4210096386","display_name":"Bridge University","ror":"https://ror.org/00cbm0437","country_code":"SS","type":"education","lineage":["https://openalex.org/I4210096386"]},{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB","SS"],"is_corresponding":false,"raw_author_name":"Ryan Cotterell","raw_affiliation_strings":[": Cambridge University"],"affiliations":[{"raw_affiliation_string":": Cambridge University","institution_ids":["https://openalex.org/I4210096386","https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071754582","display_name":"Lawrence Wolf-Sonkin","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lawrence Wolf-Sonkin","raw_affiliation_strings":[": Johns Hopkins University"],"affiliations":[{"raw_affiliation_string":": Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014226773","display_name":"Sabine Stoll","orcid":"https://orcid.org/0000-0001-6328-5654"},"institutions":[{"id":"https://openalex.org/I202697423","display_name":"University of Zurich","ror":"https://ror.org/02crff812","country_code":"CH","type":"education","lineage":["https://openalex.org/I202697423"]},{"id":"https://openalex.org/I4210160367","display_name":"University Psychiatric Hospital","ror":"https://ror.org/05reesp83","country_code":"SI","type":"healthcare","lineage":["https://openalex.org/I4210160367"]}],"countries":["CH","SI"],"is_corresponding":false,"raw_author_name":"Sabine Stoll","raw_affiliation_strings":[": University of Zrich",": University of Z\u00fcrich"],"affiliations":[{"raw_affiliation_string":": University of Zrich","institution_ids":["https://openalex.org/I4210160367"]},{"raw_affiliation_string":": University of Z\u00fcrich","institution_ids":["https://openalex.org/I202697423"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054053101","display_name":"Balthasar Bickel","orcid":"https://orcid.org/0000-0002-9087-0565"},"institutions":[{"id":"https://openalex.org/I4210160367","display_name":"University Psychiatric Hospital","ror":"https://ror.org/05reesp83","country_code":"SI","type":"healthcare","lineage":["https://openalex.org/I4210160367"]},{"id":"https://openalex.org/I202697423","display_name":"University of Zurich","ror":"https://ror.org/02crff812","country_code":"CH","type":"education","lineage":["https://openalex.org/I202697423"]}],"countries":["CH","SI"],"is_corresponding":false,"raw_author_name":"Balthasar Bickel","raw_affiliation_strings":[": University of Zrich",": University of Z\u00fcrich"],"affiliations":[{"raw_affiliation_string":": University of Zrich","institution_ids":["https://openalex.org/I4210160367"]},{"raw_affiliation_string":": University of Z\u00fcrich","institution_ids":["https://openalex.org/I202697423"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038612405","display_name":"Marco Baroni","orcid":"https://orcid.org/0000-0001-5066-3580"},"institutions":[{"id":"https://openalex.org/I11932220","display_name":"Instituci\u00f3 Catalana de Recerca i Estudis Avan\u00e7ats","ror":"https://ror.org/0371hy230","country_code":"ES","type":"nonprofit","lineage":["https://openalex.org/I11932220"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Marco Baroni","raw_affiliation_strings":["Catalan Institution for Research and Advanced Studies"],"affiliations":[{"raw_affiliation_string":"Catalan Institution for Research and Advanced Studies","institution_ids":["https://openalex.org/I11932220"]}]}],"institutions":[],"countries_distinct_count":7,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5070685856"],"corresponding_institution_ids":["https://openalex.org/I202697423","https://openalex.org/I4210160367","https://openalex.org/I4210161060"],"apc_list":null,"apc_paid":null,"fwci":0.5779,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.75386101,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"3938","last_page":"3943"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7669119834899902},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7535410523414612},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.6720118522644043},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.6460173726081848},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.623993992805481},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6223006248474121},{"id":"https://openalex.org/keywords/dependency-grammar","display_name":"Dependency grammar","score":0.5895051956176758},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.4506533145904541},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.4208822250366211},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4181492328643799}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7669119834899902},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7535410523414612},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.6720118522644043},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.6460173726081848},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.623993992805481},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6223006248474121},{"id":"https://openalex.org/C164883195","wikidata":"https://www.wikidata.org/wiki/Q674834","display_name":"Dependency grammar","level":3,"score":0.5895051956176758},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.4506533145904541},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.4208822250366211},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4181492328643799},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.18653/v1/p19-1384","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1384","pdf_url":"https://www.aclweb.org/anthology/P19-1384.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},{"id":"pmh:oai:repositori-api.upf.edu:10230/45963","is_oa":true,"landing_page_url":"http://hdl.handle.net/10230/45963","pdf_url":null,"source":{"id":"https://openalex.org/S4306402615","display_name":"Repositori digital de la UPF (Universitat Pompeu Fabra)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I170486558","host_organization_name":"Universitat Pompeu Fabra","host_organization_lineage":["https://openalex.org/I170486558"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:www.zora.uzh.ch:175234","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4306401281","display_name":"Zurich Open Repository and Archive (University of Zurich)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I202697423","host_organization_name":"University of Zurich","host_organization_lineage":["https://openalex.org/I202697423"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Blasi, Damian; Cotterell, Ryan; Wolf-Sonkin, Lawrence; Stoll, Sabine; Bickel, Balthasar; Baroni, Marco  (2019). On the Distribution of Deep Clausal Embeddings: A Large Cross-linguistic Study. In: Korhonen, Anna; Traum, David; M\u00e0rquez, Llu\u00eds. Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics. Florence: Association for Computational Linguistics, 3938-3943.","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:pure.mpg.de:item_3149942","is_oa":false,"landing_page_url":"http://hdl.handle.net/21.11116/0000-0004-628D-F","pdf_url":null,"source":{"id":"https://openalex.org/S4306400654","display_name":"MPG.PuRe (Max Planck Society)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I149899117","host_organization_name":"Max Planck Society","host_organization_lineage":["https://openalex.org/I149899117"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"info:eu-repo/semantics/conferenceObject"},{"id":"pmh:oai:pure.mpg.de:item_3258944","is_oa":false,"landing_page_url":"http://hdl.handle.net/21.11116/0000-0007-8EBF-3","pdf_url":null,"source":{"id":"https://openalex.org/S4306400654","display_name":"MPG.PuRe (Max Planck Society)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I149899117","host_organization_name":"Max Planck Society","host_organization_lineage":["https://openalex.org/I149899117"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"info:eu-repo/semantics/conferenceObject"},{"id":"doi:10.5167/uzh-175234","is_oa":true,"landing_page_url":"https://doi.org/10.5167/uzh-175234","pdf_url":null,"source":{"id":"https://openalex.org/S7407051291","display_name":"Universit\u00e4t Z\u00fcrich, ZORA","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"chapter"}],"best_oa_location":{"id":"doi:10.18653/v1/p19-1384","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1384","pdf_url":"https://www.aclweb.org/anthology/P19-1384.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8299999833106995}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2952868992.pdf","grobid_xml":"https://content.openalex.org/works/W2952868992.grobid-xml"},"referenced_works_count":26,"referenced_works":["https://openalex.org/W570794338","https://openalex.org/W1500193582","https://openalex.org/W1504970960","https://openalex.org/W1515002134","https://openalex.org/W1980187498","https://openalex.org/W1992904188","https://openalex.org/W1996672843","https://openalex.org/W1999470429","https://openalex.org/W2000196122","https://openalex.org/W2079656678","https://openalex.org/W2109592461","https://openalex.org/W2123230150","https://openalex.org/W2296582821","https://openalex.org/W2360119138","https://openalex.org/W2493916176","https://openalex.org/W2500088340","https://openalex.org/W2574640638","https://openalex.org/W2739827909","https://openalex.org/W2776319379","https://openalex.org/W2903490366","https://openalex.org/W2915774325","https://openalex.org/W2918045979","https://openalex.org/W3103362336","https://openalex.org/W4211208542","https://openalex.org/W4235107097","https://openalex.org/W4248343126"],"related_works":["https://openalex.org/W2251084681","https://openalex.org/W2098784136","https://openalex.org/W287510790","https://openalex.org/W3117798239","https://openalex.org/W4241489294","https://openalex.org/W63925617","https://openalex.org/W2968543375","https://openalex.org/W4288558800","https://openalex.org/W2888625260","https://openalex.org/W2953770453"],"abstract_inverted_index":{"Embedding":[0],"a":[1,16,25,35,81,151],"clause":[2],"inside":[3],"another":[4],"(\"the":[5],"girl":[6],"[who":[7],"likes":[8],"cars":[9],"[that":[10],"run":[11],"fast]]":[12],"has":[13,20,63],"arrived\")":[14],"is":[15,114,128],"fundamental":[17,39,189],"resource":[18],"that":[19,91,112,162],"been":[21,65],"argued":[22],"to":[23,98,136],"be":[24,137],"key":[26],"driver":[27],"of":[28,61,74,83,125],"linguistic":[29],"expressiveness.":[30],"As":[31],"such,":[32],"it":[33],"plays":[34],"central":[36],"role":[37],"in":[38,88,168,191],"debates":[40],"on":[41,55,67,120,188],"what":[42],"makes":[43],"human":[44],"language":[45],"unique,":[46],"and":[47,58,105,178],"how":[48,176],"they":[49],"might":[50],"have":[51],"evolved.":[52],"Empirical":[53],"evidence":[54,116],"the":[56,59,95,123,159],"prevalence":[57],"limits":[60],"embeddings":[62,147,164],"however":[64],"based":[66],"either":[68],"laboratory":[69],"setups":[70],"or":[71],"corpus":[72],"data":[73],"relatively":[75],"limited":[76],"size.":[77],"We":[78],"introduce":[79],"here":[80],"collection":[82],"large,":[84],"dependencyparsed":[85],"written":[86,169],"corpora":[87],"17":[89],"languages,":[90],"allow":[92],"us,":[93],"for":[94,117],"first":[96],"time,":[97],"capture":[99],"clausal":[100],"embedding":[101,121],"through":[102],"dependency":[103],"graphs":[104],"assess":[106],"their":[107],"distribution.":[108],"Our":[109],"results":[110,160],"indicate":[111],"there":[113],"no":[115],"hard":[118],"constraints":[119],"depth:":[122],"tail":[124],"depth":[126],"distributions":[127],"heavy.":[129],"Moreover,":[130],"although":[131],"deeply":[132],"embedded":[133],"clauses":[134],"tend":[135],"shorter,":[138],"suggesting":[139],"processing":[140],"load":[141],"issues,":[142],"complex":[143],"sentences":[144],"with":[145],"many":[146],"do":[148],"not":[149,166],"display":[150],"bias":[152],"towards":[153],"less":[154],"deep":[155,163],"embeddings.":[156],"Taken":[157],"together,":[158],"suggest":[161],"are":[165],"disfavored":[167],"language.":[170],"More":[171],"generally,":[172],"our":[173],"study":[174],"illustrates":[175],"resources":[177],"methods":[179],"from":[180],"latest-generation":[181],"big-data":[182],"NLP":[183],"can":[184],"provide":[185],"new":[186],"perspectives":[187],"questions":[190],"theoretical":[192],"linguistics.":[193]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
