{"id":"https://openalex.org/W7137814923","doi":"https://doi.org/10.1609/aaai.v40i38.40492","title":"Focusing on Language: Revealing and Exploiting Language Attention Heads in Multilingual Large Language Models","display_name":"Focusing on Language: Revealing and Exploiting Language Attention Heads in Multilingual Large Language Models","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137814923","doi":"https://doi.org/10.1609/aaai.v40i38.40492"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i38.40492","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i38.40492","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i38.40492","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129663288","display_name":"Xin Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xin Liu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001892361","display_name":"Qiyang Song","orcid":"https://orcid.org/0000-0002-1596-3331"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiyang Song","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129694492","display_name":"Qihang Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qihang Zhou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007408376","display_name":"Haichao Du","orcid":"https://orcid.org/0000-0003-2783-3232"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haichao Du","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101656965","display_name":"Shaowen Xu","orcid":"https://orcid.org/0009-0007-8141-8183"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shaowen Xu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129746407","display_name":"Wenbo Jiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wenbo Jiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026497909","display_name":"Weng Zhang","orcid":"https://orcid.org/0009-0005-2612-3601"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weijuan Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5015456692","display_name":"Xiaoqi Jia","orcid":"https://orcid.org/0000-0002-8376-3235"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaoqi Jia","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5129663288"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.09677419,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"38","first_page":"32195","last_page":"32203"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.14990000426769257,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.14990000426769257,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.14569999277591705,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.10589999705553055,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.8429999947547913},{"id":"https://openalex.org/keywords/multilingualism","display_name":"Multilingualism","score":0.5649999976158142},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.5586000084877014},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.5565999746322632},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4564000070095062},{"id":"https://openalex.org/keywords/language-understanding","display_name":"Language understanding","score":0.3944999873638153},{"id":"https://openalex.org/keywords/work","display_name":"Work (physics)","score":0.34139999747276306}],"concepts":[{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.8429999947547913},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7853000164031982},{"id":"https://openalex.org/C2780035574","wikidata":"https://www.wikidata.org/wiki/Q30081","display_name":"Multilingualism","level":2,"score":0.5649999976158142},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.5586000084877014},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.5565999746322632},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4830999970436096},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4564000070095062},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4564000070095062},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.3944999873638153},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.34139999747276306},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3319000005722046},{"id":"https://openalex.org/C2780312720","wikidata":"https://www.wikidata.org/wiki/Q5689100","display_name":"Head (geology)","level":2,"score":0.3122999966144562},{"id":"https://openalex.org/C94922259","wikidata":"https://www.wikidata.org/wiki/Q33215","display_name":"Constructed language","level":2,"score":0.3061000108718872},{"id":"https://openalex.org/C171041071","wikidata":"https://www.wikidata.org/wiki/Q36870","display_name":"First language","level":2,"score":0.3009999990463257},{"id":"https://openalex.org/C83479923","wikidata":"https://www.wikidata.org/wiki/Q2063748","display_name":"Universal Networking Language","level":4,"score":0.2842999994754791},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.2630000114440918},{"id":"https://openalex.org/C49876356","wikidata":"https://www.wikidata.org/wiki/Q7002651","display_name":"Neuroscience of multilingualism","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.2540999948978424}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i38.40492","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i38.40492","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i38.40492","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i38.40492","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.8086822628974915,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1,117,122,149],"models":[2],"(LLMs)":[3],"increasingly":[4],"support":[5],"multilingual":[6,23,38,53,76,130,168],"understanding":[7],"and":[8,67,82,93,102,119,167],"generation.":[9],"Meanwhile,":[10],"efforts":[11],"to":[12,21,90,111,126,144,156],"interpret":[13],"their":[14],"internal":[15],"mechanisms":[16],"have":[17],"emerged,":[18],"offering":[19],"insights":[20],"enhance":[22],"performance.":[24],"While":[25],"multi-head":[26],"self-attention":[27],"(MHA)":[28],"has":[29],"proven":[30],"critical":[31],"in":[32,37,51,55,129],"many":[33],"areas,":[34],"its":[35],"role":[36],"capabilities":[39,77,169],"remains":[40],"underexplored.":[41],"In":[42],"this":[43],"work,":[44],"we":[45,95],"study":[46],"the":[47,86,97,113,165,173],"contribution":[48],"of":[49,99,170,175],"MHA":[50],"supporting":[52],"processing":[54],"LLMs.":[56,131],"We":[57,132],"propose":[58],"Language":[59],"Attention":[60],"Head":[61],"Importance":[62],"Scores":[63],"(LAHIS),":[64],"an":[65],"effective":[66],"efficient":[68],"method":[69],"that":[70,138],"identifies":[71],"attention":[72,109,146],"head":[73,142],"importance":[74],"for":[75],"via":[78],"a":[79,135,140],"single":[80],"forward":[81],"backward":[83],"pass":[84],"through":[85],"LLM.":[87],"Applying":[88],"LAHIS":[89],"Aya-23-8B,":[91],"Llama-3.2-3B,":[92],"Mistral-7B-v0.1,":[94],"reveal":[96],"existence":[98],"both":[100,164],"language-specific":[101],"language-general":[103],"heads.":[104],"Language-specific":[105],"heads":[106],"enable":[107],"cross-lingual":[108],"transfer":[110],"guide":[112],"model":[114],"toward":[115],"target":[116],"contexts":[118],"mitigate":[120],"off-target":[121],"generation":[123],"issue,":[124],"contributing":[125],"addressing":[127],"challenges":[128],"also":[133],"introduce":[134],"lightweight":[136],"adaptation":[137],"learns":[139],"soft":[141],"mask":[143],"modulate":[145],"outputs":[147],"over":[148],"heads,":[150],"requiring":[151],"only":[152],"20":[153],"tunable":[154],"parameters":[155],"improve":[157],"XQuAD":[158],"accuracy.":[159],"Overall,":[160],"our":[161],"work":[162],"enhances":[163],"interpretability":[166],"LLMs":[171],"from":[172],"perspective":[174],"MHA.":[176]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
