{"id":"https://openalex.org/W4392090020","doi":"https://doi.org/10.48550/arxiv.2402.13703","title":"Investigating Multilingual Instruction-Tuning: Do Polyglot Models Demand for Multilingual Instructions?","display_name":"Investigating Multilingual Instruction-Tuning: Do Polyglot Models Demand for Multilingual Instructions?","publication_year":2024,"publication_date":"2024-02-21","ids":{"openalex":"https://openalex.org/W4392090020","doi":"https://doi.org/10.48550/arxiv.2402.13703"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2402.13703","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2402.13703","pdf_url":"https://arxiv.org/pdf/2402.13703","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2402.13703","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093073363","display_name":"Alexander Arno Weber","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Weber, Alexander Arno","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045495929","display_name":"Klaudia Thellmann","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thellmann, Klaudia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024837698","display_name":"Jan Ebert","orcid":"https://orcid.org/0000-0001-7118-0481"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ebert, Jan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071393532","display_name":"Nicolas Flores-Herr","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Flores-Herr, Nicolas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067133778","display_name":"Jens Lehmann","orcid":"https://orcid.org/0000-0001-9108-4278"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lehmann, Jens","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093073365","display_name":"Michael Fromm","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fromm, Michael","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5011914487","display_name":"Mehdi Ali","orcid":"https://orcid.org/0000-0003-1653-3920"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ali, Mehdi","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5093073363"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12542","display_name":"Second Language Learning and Teaching","score":0.9010000228881836,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12542","display_name":"Second Language Learning and Teaching","score":0.9010000228881836,"subfield":{"id":"https://openalex.org/subfields/1208","display_name":"Literature and Literary Theory"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/polyglot","display_name":"Polyglot","score":0.9910485744476318},{"id":"https://openalex.org/keywords/multilingual-education","display_name":"Multilingual Education","score":0.6696842908859253},{"id":"https://openalex.org/keywords/multilingualism","display_name":"Multilingualism","score":0.614208459854126},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5570425391197205},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.32856374979019165},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.20612269639968872},{"id":"https://openalex.org/keywords/pedagogy","display_name":"Pedagogy","score":0.143884539604187},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.10382285714149475}],"concepts":[{"id":"https://openalex.org/C2780239667","wikidata":"https://www.wikidata.org/wiki/Q2102850","display_name":"Polyglot","level":2,"score":0.9910485744476318},{"id":"https://openalex.org/C2778035315","wikidata":"https://www.wikidata.org/wiki/Q17070152","display_name":"Multilingual Education","level":3,"score":0.6696842908859253},{"id":"https://openalex.org/C2780035574","wikidata":"https://www.wikidata.org/wiki/Q30081","display_name":"Multilingualism","level":2,"score":0.614208459854126},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5570425391197205},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.32856374979019165},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.20612269639968872},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.143884539604187},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10382285714149475},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2402.13703","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2402.13703","pdf_url":"https://arxiv.org/pdf/2402.13703","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2402.13703","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2402.13703","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2402.13703","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2402.13703","pdf_url":"https://arxiv.org/pdf/2402.13703","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320321114","display_name":"Bundesministerium f\u00fcr Bildung und Forschung","ror":"https://ror.org/04pz7b180"},{"id":"https://openalex.org/F4320321613","display_name":"Technische Universit\u00e4t Dresden","ror":"https://ror.org/042aqky30"},{"id":"https://openalex.org/F4320331625","display_name":"Gauss Centre for Supercomputing","ror":"https://ror.org/0585fsj26"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4392090020.pdf","grobid_xml":"https://content.openalex.org/works/W4392090020.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4386048527","https://openalex.org/W2967426661","https://openalex.org/W4248762994","https://openalex.org/W2249462550","https://openalex.org/W2771184316","https://openalex.org/W1541230137","https://openalex.org/W137515628","https://openalex.org/W3081636694","https://openalex.org/W3167245123","https://openalex.org/W4385647947"],"abstract_inverted_index":{"The":[0],"adaption":[1],"of":[2,33,36,51,62,91],"multilingual":[3,37,74,119,147],"pre-trained":[4],"LLMs":[5,75],"into":[6],"eloquent":[7],"and":[8,64,71,143],"helpful":[9],"assistants":[10],"is":[11],"essential":[12],"to":[13,28,101,137],"facilitate":[14],"their":[15],"use":[16],"across":[17,48],"different":[18,41],"language":[19,42,63],"regions.":[20],"In":[21],"that":[22,86,106],"spirit,":[23],"we":[24,104,131],"are":[25],"the":[26,34,52,60,107,117,139],"first":[27],"conduct":[29,132],"an":[30],"extensive":[31],"study":[32,136],"performance":[35],"models":[38],"instruction-tuned":[39],"on":[40,44,68,79,88],"compositions":[43],"parallel":[45,80,89],"instruction-tuning":[46,77,81,87,128],"benchmarks":[47],"a":[49,69,72,124,133],"selection":[50],"most":[53],"spoken":[54],"Indo-European":[55],"languages.":[56],"We":[57],"systematically":[58],"examine":[59],"effects":[61],"instruction":[65,96],"dataset":[66],"size":[67],"mid-sized":[70],"large,":[73],"by":[76,99],"them":[78],"datasets.":[82,129],"Our":[83],"results":[84],"demonstrate":[85],"instead":[90],"monolingual":[92],"corpora":[93],"benefits":[94],"cross-lingual":[95],"following":[97],"capabilities":[98],"up":[100],"9.9%.":[102],"Furthermore,":[103],"show":[105],"Superficial":[108],"Alignment":[109],"Hypothesis":[110],"does":[111],"not":[112],"hold":[113],"in":[114],"general,":[115],"as":[116],"investigated":[118],"7B":[120],"parameter":[121],"model":[122],"presents":[123],"counter-example":[125],"requiring":[126],"large-scale":[127],"Finally,":[130],"human":[134],"annotation":[135],"understand":[138],"alignment":[140],"between":[141],"human-based":[142],"GPT-4-based":[144],"evaluation":[145],"within":[146],"chat":[148],"scenarios.":[149]},"counts_by_year":[],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
