{"id":"https://openalex.org/W4223508017","doi":"https://doi.org/10.1162/coli_a_00444","title":"Investigating Language Relationships in Multilingual Sentence Encoders Through the Lens of Linguistic Typology","display_name":"Investigating Language Relationships in Multilingual Sentence Encoders Through the Lens of Linguistic Typology","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4223508017","doi":"https://doi.org/10.1162/coli_a_00444"},"language":"en","primary_location":{"id":"doi:10.1162/coli_a_00444","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00444","pdf_url":"https://direct.mit.edu/coli/article-pdf/doi/10.1162/coli_a_00444/2034360/coli_a_00444.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/coli/article-pdf/doi/10.1162/coli_a_00444/2034360/coli_a_00444.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109855717","display_name":"Rochelle Choenni","orcid":null},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Rochelle Choenni","raw_affiliation_strings":["University of Amsterdam The Institute for Logic, Language and Computation (ILLC) r.m.v.k.choenni@uva.nl"],"affiliations":[{"raw_affiliation_string":"University of Amsterdam The Institute for Logic, Language and Computation (ILLC) r.m.v.k.choenni@uva.nl","institution_ids":["https://openalex.org/I887064364"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016184654","display_name":"Ekaterina Shutova","orcid":"https://orcid.org/0009-0003-6664-4474"},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Ekaterina Shutova","raw_affiliation_strings":["University of Amsterdam The Institute for Logic, Language and Computation (ILLC) e.shutova@uva.nl"],"affiliations":[{"raw_affiliation_string":"University of Amsterdam The Institute for Logic, Language and Computation (ILLC) e.shutova@uva.nl","institution_ids":["https://openalex.org/I887064364"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5016184654","https://openalex.org/A5109855717"],"corresponding_institution_ids":["https://openalex.org/I887064364"],"apc_list":null,"apc_paid":null,"fwci":1.2417,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.82417336,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"48","issue":"3","first_page":"635","last_page":"672"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8009571433067322},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.7764168977737427},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6042087078094482},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5660375356674194},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.5461283326148987},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.5315843224525452},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.516377329826355},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5059065222740173},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.42147624492645264}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8009571433067322},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.7764168977737427},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6042087078094482},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5660375356674194},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.5461283326148987},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.5315843224525452},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.516377329826355},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5059065222740173},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.42147624492645264},{"id":"https://openalex.org/C44870925","wikidata":"https://www.wikidata.org/wiki/Q37547","display_name":"Astrophysics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1162/coli_a_00444","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00444","pdf_url":"https://direct.mit.edu/coli/article-pdf/doi/10.1162/coli_a_00444/2034360/coli_a_00444.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:dare.uva.nl:openaire/26277b97-acb6-43d9-81b2-85d67b12d20c","is_oa":false,"landing_page_url":"https://handle.uba.uva.nl/personal/pure/en/publications/investigating-language-relationships-in-multilingual-sentence-encoders-through-the-lens-of-linguistic-typology(26277b97-acb6-43d9-81b2-85d67b12d20c).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Choenni, R & Shutova, E 2022, 'Investigating language relationships in multilingual sentence encoders through the lens of linguistic typology', Computational Linguistics, vol. 48, no. 3, pp. 635\u2013672. https://doi.org/10.1162/coli_a_00444","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:doaj.org/article:2b8cb6e381104f02940f817654711dae","is_oa":true,"landing_page_url":"https://doaj.org/article/2b8cb6e381104f02940f817654711dae","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computational Linguistics, Vol 48, Iss 3 (2022)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/coli_a_00444","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00444","pdf_url":"https://direct.mit.edu/coli/article-pdf/doi/10.1162/coli_a_00444/2034360/coli_a_00444.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8199999928474426}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4223508017.pdf","grobid_xml":"https://content.openalex.org/works/W4223508017.grobid-xml"},"referenced_works_count":80,"referenced_works":["https://openalex.org/W650948056","https://openalex.org/W803770162","https://openalex.org/W1504725289","https://openalex.org/W1522301498","https://openalex.org/W2016630033","https://openalex.org/W2081795963","https://openalex.org/W2128278152","https://openalex.org/W2137807925","https://openalex.org/W2143954309","https://openalex.org/W2151942821","https://openalex.org/W2156064360","https://openalex.org/W2250539671","https://openalex.org/W2251324968","https://openalex.org/W2251904602","https://openalex.org/W2270364989","https://openalex.org/W2270593706","https://openalex.org/W2471692228","https://openalex.org/W2511550932","https://openalex.org/W2531638282","https://openalex.org/W2549835527","https://openalex.org/W2747329762","https://openalex.org/W2799266483","https://openalex.org/W2810095012","https://openalex.org/W2887428522","https://openalex.org/W2888329843","https://openalex.org/W2888536529","https://openalex.org/W2890225082","https://openalex.org/W2891555348","https://openalex.org/W2911267749","https://openalex.org/W2911681509","https://openalex.org/W2914120296","https://openalex.org/W2917458986","https://openalex.org/W2946417913","https://openalex.org/W2952638691","https://openalex.org/W2962739339","https://openalex.org/W2963352326","https://openalex.org/W2963651521","https://openalex.org/W2964084097","https://openalex.org/W2964204621","https://openalex.org/W2965373594","https://openalex.org/W2970854433","https://openalex.org/W2971207485","https://openalex.org/W2971344868","https://openalex.org/W2973088264","https://openalex.org/W2978223337","https://openalex.org/W2985620815","https://openalex.org/W2988257285","https://openalex.org/W2998272617","https://openalex.org/W3009095382","https://openalex.org/W3013563411","https://openalex.org/W3017290615","https://openalex.org/W3035137491","https://openalex.org/W3035390927","https://openalex.org/W3100198908","https://openalex.org/W3103490574","https://openalex.org/W3105005398","https://openalex.org/W3105788222","https://openalex.org/W3174985167","https://openalex.org/W4298419326","https://openalex.org/W6631190155","https://openalex.org/W6637833912","https://openalex.org/W6676932571","https://openalex.org/W6677773616","https://openalex.org/W6680319291","https://openalex.org/W6681367800","https://openalex.org/W6688754573","https://openalex.org/W6691772385","https://openalex.org/W6694001839","https://openalex.org/W6731871716","https://openalex.org/W6732580787","https://openalex.org/W6740926173","https://openalex.org/W6755207826","https://openalex.org/W6757635932","https://openalex.org/W6758109434","https://openalex.org/W6759455113","https://openalex.org/W6766673545","https://openalex.org/W6774114028","https://openalex.org/W6784050413","https://openalex.org/W6996813427","https://openalex.org/W6999611261"],"related_works":["https://openalex.org/W2372020181","https://openalex.org/W2156531654","https://openalex.org/W4378714697","https://openalex.org/W1581723585","https://openalex.org/W2294330161","https://openalex.org/W2804553224","https://openalex.org/W4283822356","https://openalex.org/W1950940422","https://openalex.org/W2129146436","https://openalex.org/W1510159504"],"abstract_inverted_index":{"Abstract":[0],"Multilingual":[1],"sentence":[2,95],"encoders":[3,96],"have":[4],"seen":[5],"much":[6],"success":[7,17],"in":[8,138,164,192],"cross-lingual":[9,33],"model":[10],"transfer":[11,20],"for":[12,88],"downstream":[13],"NLP":[14],"tasks.":[15],"The":[16,171],"of":[18,32,45,52,72,107,130,160],"this":[19,60],"is,":[21],"however,":[22],"dependent":[23],"on":[24],"the":[25,30,43,49,56,70,131],"model\u2019s":[26],"ability":[27],"to":[28,104,111,154],"encode":[29],"patterns":[31,51],"similarity":[34],"and":[35,77,80,100,114,169,179],"variation.":[36],"Yet,":[37],"we":[38,62,118,149],"know":[39],"relatively":[40],"little":[41],"about":[42,123],"properties":[44,109,159,184],"individual":[46],"languages":[47,124,161,191],"or":[48],"general":[50],"linguistic":[53,73,140,183],"variation":[54,82,141],"that":[55,181],"models":[57],"encode.":[58],"In":[59,147],"article,":[61],"investigate":[63,119],"these":[64,182,193],"questions":[65],"by":[66],"leveraging":[67],"knowledge":[68],"from":[69],"field":[71],"typology,":[74],"which":[75],"studies":[76],"documents":[78],"structural":[79],"semantic":[81],"across":[83,127,188],"languages.":[84],"We":[85],"propose":[86,150],"methods":[87],"separating":[89],"language-specific":[90],"subspaces":[91],"within":[92],"state-of-the-art":[93,166],"multilingual":[94,167],"(LASER,":[97],"M-BERT,":[98],"XLM,":[99],"XLM-R)":[101],"with":[102,143],"respect":[103],"a":[105,151],"range":[106],"typological":[108,121,158],"pertaining":[110],"lexical,":[112],"morphological,":[113],"syntactic":[115],"structure.":[116],"Moreover,":[117],"how":[120,156],"information":[122],"is":[125],"distributed":[126],"all":[128],"layers":[129],"models.":[132,194],"Our":[133],"results":[134,172],"show":[135],"interesting":[136],"differences":[137],"encoding":[139],"associated":[142],"different":[144],"pretraining":[145],"strategies.":[146],"addition,":[148],"simple":[152],"method":[153],"study":[155],"shared":[157],"are":[162,185],"encoded":[163,186],"two":[165],"models\u2014M-BERT":[168],"XLM-R.":[170],"provide":[173],"insight":[174],"into":[175],"their":[176],"information-sharing":[177],"mechanisms":[178],"suggest":[180],"jointly":[187],"typologically":[189],"similar":[190]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2023,"cited_by_count":9}],"updated_date":"2026-03-16T09:10:04.655348","created_date":"2025-10-10T00:00:00"}
