{"id":"https://openalex.org/W7127165865","doi":"https://doi.org/10.48550/arxiv.2601.22851","title":"When Meanings Meet: Investigating the Emergence and Quality of Shared Concept Spaces during Multilingual Language Model Training","display_name":"When Meanings Meet: Investigating the Emergence and Quality of Shared Concept Spaces during Multilingual Language Model Training","publication_year":2026,"publication_date":"2026-01-30","ids":{"openalex":"https://openalex.org/W7127165865","doi":"https://doi.org/10.48550/arxiv.2601.22851"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2601.22851","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115817676","display_name":"Felicia K\u00f6rner","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"K\u00f6rner, Felicia","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"M\u00fcller-Eberstein, Max","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"M\u00fcller-Eberstein, Max","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124851241","display_name":"Anna Korhonen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Korhonen, Anna","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124835059","display_name":"Barbara Plank","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Plank, Barbara","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.2565000057220459,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.2565000057220459,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.21660000085830688,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.08649999648332596,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.875},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5782999992370605},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5126000046730042},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5103999972343445},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4560999870300293},{"id":"https://openalex.org/keywords/causal-model","display_name":"Causal model","score":0.4465999901294708},{"id":"https://openalex.org/keywords/copying","display_name":"Copying","score":0.44429999589920044},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.43689998984336853}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.875},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5799999833106995},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5782999992370605},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5126000046730042},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5103999972343445},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4560999870300293},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.45210000872612},{"id":"https://openalex.org/C11671645","wikidata":"https://www.wikidata.org/wiki/Q5054567","display_name":"Causal model","level":2,"score":0.4465999901294708},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.446399986743927},{"id":"https://openalex.org/C2779151265","wikidata":"https://www.wikidata.org/wiki/Q1156791","display_name":"Copying","level":2,"score":0.44429999589920044},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4404999911785126},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.43689998984336853},{"id":"https://openalex.org/C2780035574","wikidata":"https://www.wikidata.org/wiki/Q30081","display_name":"Multilingualism","level":2,"score":0.41819998621940613},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.4050999879837036},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.38530001044273376},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3804999887943268},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.3594000041484833},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.3111000061035156},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.3093999922275543},{"id":"https://openalex.org/C35512512","wikidata":"https://www.wikidata.org/wiki/Q501778","display_name":"Translation studies","level":2,"score":0.29109999537467957},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.2540000081062317}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2601.22851","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2601.22851","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.22851","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2601.22851","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.827301561832428,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Training":[0],"Large":[1],"Language":[2],"Models":[3],"(LLMs)":[4],"with":[5,128],"high":[6],"multilingual":[7,27,198],"coverage":[8],"is":[9,130],"becoming":[10],"increasingly":[11],"important":[12],"--":[13,154,168],"especially":[14],"when":[15],"monolingual":[16],"resources":[17],"are":[18],"scarce.":[19],"Recent":[20],"studies":[21,43],"have":[22],"found":[23],"that":[24,115,126,143],"LLMs":[25],"process":[26],"inputs":[28],"in":[29,133,147,152,197],"shared":[30,116],"concept":[31,75,92,117],"spaces,":[32],"thought":[33],"to":[34,101,123,135],"support":[35],"generalization":[36],"and":[37,121,186],"cross-lingual":[38,91,166,184],"transfer.":[39],"However,":[40],"these":[41,64],"prior":[42,136],"often":[44],"do":[45],"not":[46],"use":[47],"causal":[48,83,191],"methods,":[49],"lack":[50],"deeper":[51],"error":[52],"analysis":[53,141],"or":[54,161],"focus":[55],"on":[56],"the":[57,71,82,111,180,187],"final":[58],"model":[59],"only,":[60],"leaving":[61],"open":[62],"how":[63,103],"spaces":[65,76,118],"emerge":[66,119],"during":[67,77],"training.":[68],"We":[69,89,113],"investigate":[70,102],"development":[72],"of":[73,79,86,110,164,183],"language-agnostic":[74],"pretraining":[78],"EuroLLM":[80],"through":[81],"interpretability":[84,192],"method":[85],"activation":[87],"patching.":[88],"isolate":[90],"representations,":[93],"then":[94],"inject":[95],"them":[96,129],"into":[97,179],"a":[98],"translation":[99,148,172],"prompt":[100],"consistently":[104],"translations":[105],"can":[106],"be":[107],"altered,":[108],"independently":[109],"language.":[112],"find":[114],"early}":[120],"continue":[122],"refine,":[124],"but":[125],"alignment":[127,185],"language-dependent}.":[131],"Furthermore,":[132],"contrast":[134],"work,":[137],"our":[138],"fine-grained":[139],"manual":[140],"reveals":[142],"some":[144],"apparent":[145],"gains":[146],"quality":[149],"reflect":[150],"shifts":[151],"behavior":[153],"like":[155],"selecting":[156],"senses":[157],"for":[158],"polysemous":[159],"words":[160],"translating":[162],"instead":[163],"copying":[165],"homographs":[167],"rather":[169],"than":[170],"improved":[171],"ability.":[173],"Our":[174],"findings":[175],"offer":[176,194],"new":[177],"insight":[178],"training":[181],"dynamics":[182],"conditions":[188],"under":[189],"which":[190],"methods":[193],"meaningful":[195],"insights":[196],"contexts.":[199]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-03T00:00:00"}
