{"id":"https://openalex.org/W4391282676","doi":"https://doi.org/10.1007/s10676-023-09742-6","title":"Diversity and language technology: how\u00a0language\u00a0modeling\u00a0bias causes\u00a0epistemic\u00a0injustice","display_name":"Diversity and language technology: how\u00a0language\u00a0modeling\u00a0bias causes\u00a0epistemic\u00a0injustice","publication_year":2024,"publication_date":"2024-01-27","ids":{"openalex":"https://openalex.org/W4391282676","doi":"https://doi.org/10.1007/s10676-023-09742-6"},"language":"en","primary_location":{"id":"doi:10.1007/s10676-023-09742-6","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10676-023-09742-6","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10676-023-09742-6.pdf","source":{"id":"https://openalex.org/S13096939","display_name":"Ethics and Information Technology","issn_l":"1388-1957","issn":["1388-1957","1572-8439"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ethics and Information Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10676-023-09742-6.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057959142","display_name":"Paula Helm","orcid":null},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":true,"raw_author_name":"Paula Helm","raw_affiliation_strings":["University of Amsterdam, Amsterdam, The Netherlands","UvA - University of Amsterdam [Amsterdam] = Universiteit van Amsterdam (Spui 21 1012 WX Amsterdam - Netherlands)"],"raw_orcid":"https://orcid.org/0000-0002-2719-9721","affiliations":[{"raw_affiliation_string":"University of Amsterdam, Amsterdam, The Netherlands","institution_ids":["https://openalex.org/I887064364"]},{"raw_affiliation_string":"UvA - University of Amsterdam [Amsterdam] = Universiteit van Amsterdam (Spui 21 1012 WX Amsterdam - Netherlands)","institution_ids":["https://openalex.org/I887064364"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075478426","display_name":"G\u00e1bor Bella","orcid":"https://orcid.org/0000-0002-3868-1740"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I161929037","display_name":"Universit\u00e9 de Bretagne Occidentale","ror":"https://ror.org/01b8h3982","country_code":"FR","type":"education","lineage":["https://openalex.org/I161929037"]},{"id":"https://openalex.org/I4210123702","display_name":"Laboratoire des Sciences et Techniques de l\u2019Information de la Communication et de la Connaissance","ror":"https://ror.org/0266kfd37","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I180375564","https://openalex.org/I201181511","https://openalex.org/I205703379","https://openalex.org/I2802204017","https://openalex.org/I4210123702","https://openalex.org/I4210127572","https://openalex.org/I4210145102","https://openalex.org/I4210148559","https://openalex.org/I4210159245","https://openalex.org/I4405260085"]},{"id":"https://openalex.org/I4210127572","display_name":"IMT Atlantique","ror":"https://ror.org/030hj3061","country_code":"FR","type":"education","lineage":["https://openalex.org/I205703379","https://openalex.org/I4210127572"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"G\u00e1bor Bella","raw_affiliation_strings":["Lab-STICC CNRS UMR 628, IMT Atlantique, Brest, France","IMT Atlantique - DSD - D\u00e9partement de Science des Donn\u00e9es (IMT Atlantique - Campus de Brest - Technop\u00f4le Brest-Iroise CS 8381829238 BREST Cedex 3 - France)","Lab-STICC_DECIDE - Equipe DECIDE (France)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Lab-STICC CNRS UMR 628, IMT Atlantique, Brest, France","institution_ids":["https://openalex.org/I1294671590","https://openalex.org/I4210127572","https://openalex.org/I161929037","https://openalex.org/I4210123702"]},{"raw_affiliation_string":"IMT Atlantique - DSD - D\u00e9partement de Science des Donn\u00e9es (IMT Atlantique - Campus de Brest - Technop\u00f4le Brest-Iroise CS 8381829238 BREST Cedex 3 - France)","institution_ids":["https://openalex.org/I4210127572"]},{"raw_affiliation_string":"Lab-STICC_DECIDE - Equipe DECIDE (France)","institution_ids":["https://openalex.org/I161929037"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074914419","display_name":"Gertraud Koch","orcid":"https://orcid.org/0000-0002-2457-3335"},"institutions":[{"id":"https://openalex.org/I159176309","display_name":"Universit\u00e4t Hamburg","ror":"https://ror.org/00g30e956","country_code":"DE","type":"education","lineage":["https://openalex.org/I159176309"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Gertraud Koch","raw_affiliation_strings":["University of Hamburg, Hamburg, Germany","University of Hamburg (Mittelweg 177, 20148 Hamburg - Germany)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Hamburg, Hamburg, Germany","institution_ids":["https://openalex.org/I159176309"]},{"raw_affiliation_string":"University of Hamburg (Mittelweg 177, 20148 Hamburg - Germany)","institution_ids":["https://openalex.org/I159176309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001227032","display_name":"Fausto Giunchiglia","orcid":"https://orcid.org/0000-0002-5903-6150"},"institutions":[{"id":"https://openalex.org/I193223587","display_name":"University of Trento","ror":"https://ror.org/05trd4x28","country_code":"IT","type":"education","lineage":["https://openalex.org/I193223587"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Fausto Giunchiglia","raw_affiliation_strings":["University of Trento, Trento, Italy","UNITN - Universit\u00e0 degli Studi di Trento =  University of Trento (via Calepina, 14 - I-38122 Trento - Italy)"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Trento, Trento, Italy","institution_ids":["https://openalex.org/I193223587"]},{"raw_affiliation_string":"UNITN - Universit\u00e0 degli Studi di Trento =  University of Trento (via Calepina, 14 - I-38122 Trento - Italy)","institution_ids":["https://openalex.org/I193223587"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5057959142"],"corresponding_institution_ids":["https://openalex.org/I887064364"],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890},"apc_paid":{"value":2290,"currency":"EUR","value_usd":2890},"fwci":19.2058,"has_fulltext":true,"cited_by_count":60,"citation_normalized_percentile":{"value":0.99480996,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":"26","issue":"1","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9879999756813049,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.974399983882904,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9657999873161316,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6116225719451904},{"id":"https://openalex.org/keywords/injustice","display_name":"Injustice","score":0.5303595662117004},{"id":"https://openalex.org/keywords/diversity","display_name":"Diversity (politics)","score":0.5283784866333008},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.4890211522579193},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4583224356174469},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.43900078535079956},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3250797390937805},{"id":"https://openalex.org/keywords/politics","display_name":"Politics","score":0.31071728467941284},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.19243884086608887},{"id":"https://openalex.org/keywords/political-science","display_name":"Political science","score":0.158937007188797},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.12433171272277832}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6116225719451904},{"id":"https://openalex.org/C2777266375","wikidata":"https://www.wikidata.org/wiki/Q770480","display_name":"Injustice","level":2,"score":0.5303595662117004},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.5283784866333008},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.4890211522579193},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4583224356174469},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.43900078535079956},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3250797390937805},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.31071728467941284},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.19243884086608887},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.158937007188797},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.12433171272277832},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1007/s10676-023-09742-6","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10676-023-09742-6","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10676-023-09742-6.pdf","source":{"id":"https://openalex.org/S13096939","display_name":"Ethics and Information Technology","issn_l":"1388-1957","issn":["1388-1957","1572-8439"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ethics and Information Technology","raw_type":"journal-article"},{"id":"pmh:oai:dare.uva.nl:openaire_cris_publications/d1e04b0e-7e2a-4d34-9163-3e6aa4d33d74","is_oa":true,"landing_page_url":"https://handle.uba.uva.nl/personal/pure/en/publications/diversity-and-language-technology(d1e04b0e-7e2a-4d34-9163-3e6aa4d33d74).html","pdf_url":null,"source":{"id":"https://openalex.org/S4306400088","display_name":"UvA-DARE (University of Amsterdam)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I887064364","host_organization_name":"University of Amsterdam","host_organization_lineage":["https://openalex.org/I887064364"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Helm, P, Bella, G, Koch, G & Giunchiglia, F 2024, 'Diversity and language technology: how language modeling bias causes epistemic injustice', Ethics and Information Technology, vol. 26, 8. https://doi.org/10.1007/s10676-023-09742-6","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:HAL:hal-04421595v1","is_oa":true,"landing_page_url":"https://hal.science/hal-04421595","pdf_url":"https://hal.science/hal-04421595/document","source":{"id":"https://openalex.org/S4406922466","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Ethics and Information Technology, 2024, 26 (1), pp.8. &#x27E8;10.1007/s10676-023-09742-6&#x27E9;","raw_type":"Journal articles"}],"best_oa_location":{"id":"doi:10.1007/s10676-023-09742-6","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10676-023-09742-6","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10676-023-09742-6.pdf","source":{"id":"https://openalex.org/S13096939","display_name":"Ethics and Information Technology","issn_l":"1388-1957","issn":["1388-1957","1572-8439"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ethics and Information Technology","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.44999998807907104,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4391282676.pdf"},"referenced_works_count":66,"referenced_works":["https://openalex.org/W593705254","https://openalex.org/W1509982784","https://openalex.org/W1555354714","https://openalex.org/W1964045210","https://openalex.org/W2006447892","https://openalex.org/W2007619812","https://openalex.org/W2029150930","https://openalex.org/W2034609093","https://openalex.org/W2036333904","https://openalex.org/W2038721957","https://openalex.org/W2130645129","https://openalex.org/W2139419483","https://openalex.org/W2212352435","https://openalex.org/W2318516124","https://openalex.org/W2504995794","https://openalex.org/W2507975203","https://openalex.org/W2620652927","https://openalex.org/W2728235978","https://openalex.org/W2740960106","https://openalex.org/W2791285506","https://openalex.org/W2909212904","https://openalex.org/W2962784628","https://openalex.org/W2989361680","https://openalex.org/W2996844929","https://openalex.org/W2997585375","https://openalex.org/W3035032094","https://openalex.org/W3037831233","https://openalex.org/W3045025511","https://openalex.org/W3103585759","https://openalex.org/W3112849432","https://openalex.org/W3133702157","https://openalex.org/W3146083582","https://openalex.org/W3155618984","https://openalex.org/W3160317075","https://openalex.org/W3167873515","https://openalex.org/W3172917028","https://openalex.org/W3173660000","https://openalex.org/W3196248941","https://openalex.org/W3216577334","https://openalex.org/W4205756177","https://openalex.org/W4223485168","https://openalex.org/W4242071405","https://openalex.org/W4244736247","https://openalex.org/W4283269670","https://openalex.org/W4285123703","https://openalex.org/W4285251426","https://openalex.org/W4285273714","https://openalex.org/W4285275721","https://openalex.org/W4285664076","https://openalex.org/W4288096731","https://openalex.org/W4298286109","https://openalex.org/W4323051415","https://openalex.org/W4323851652","https://openalex.org/W4385571864","https://openalex.org/W4388265233","https://openalex.org/W4388832261","https://openalex.org/W6621507322","https://openalex.org/W6632595698","https://openalex.org/W6632884790","https://openalex.org/W6638208828","https://openalex.org/W6650455210","https://openalex.org/W6685225694","https://openalex.org/W6697477984","https://openalex.org/W6778661971","https://openalex.org/W6794977174","https://openalex.org/W7023606580"],"related_works":["https://openalex.org/W650520959","https://openalex.org/W4210480338","https://openalex.org/W4387994202","https://openalex.org/W1522275965","https://openalex.org/W3204462233","https://openalex.org/W2581779613","https://openalex.org/W2052415453","https://openalex.org/W2898153616","https://openalex.org/W2318801550","https://openalex.org/W2616641213"],"abstract_inverted_index":{"Abstract":[0],"It":[1],"is":[2,88,240],"well":[3],"known":[4],"that":[5,69,114,148,180,186,239],"AI-based":[6],"language":[7,9,44,52,82,98,115,230],"technology\u2014large":[8],"models,":[10],"machine":[11],"translation":[12],"systems,":[13],"multilingual":[14],"dictionaries,":[15],"and":[16,30,91,128,182,204],"corpora\u2014is":[17],"currently":[18],"limited":[19,134],"to":[20,40,54,65,71,110,163,175,199,212,222,242],"three":[21],"percent":[22],"of":[23,50,61,94,130,138,142,152,159,168,194,218,225,228,245],"the":[24,42,48,136,150,176,184,192,200,226,246],"world\u2019s":[25],"most":[26],"widely":[27],"spoken,":[28],"financially":[29],"politically":[31],"backed":[32],"languages.":[33],"In":[34],"response,":[35],"recent":[36],"efforts":[37,63],"have":[38],"sought":[39],"address":[41],"\u201cdigital":[43],"divide\u201d":[45],"by":[46,100],"extending":[47],"reach":[49],"large":[51],"models":[53],"\u201cunderserved":[55],"languages.\u201d":[56],"We":[57,112,145],"show":[58,113,205],"how":[59,206],"some":[60,244],"these":[62],"tend":[64],"produce":[66],"flawed":[67],"solutions":[68],"adhere":[70],"a":[72,89,156,165,213],"hard-wired":[73],"representational":[74],"preference":[75],"for":[76,215],"certain":[77,103],"languages,":[78,104,181],"which":[79,170],"we":[80,197,233],"call":[81],"modeling":[83,86,116],"bias.":[84],"Language":[85],"bias":[87,96,117],"specific":[90],"under-studied":[92],"form":[93],"linguistic":[95],"were":[97],"technology":[99,160],"design":[101],"favors":[102],"dialects,":[105],"or":[106],"sociolects":[107],"with":[108],"respect":[109],"others.":[111],"can":[118,208],"result":[119],"in":[120,135],"systems":[121],"that,":[122],"while":[123],"being":[124],"precise":[125],"regarding":[126],"languages":[127],"cultures":[129],"dominant":[131],"powers,":[132],"are":[133],"expression":[137],"socio-culturally":[139],"relevant":[140],"notions":[141],"other":[143],"communities.":[144,231],"further":[146],"argue":[147],"at":[149],"root":[151],"this":[153],"problem":[154],"lies":[155],"systematic":[157],"tendency":[158],"developer":[161],"communities":[162,185],"apply":[164],"simplistic":[166],"understanding":[167],"diversity":[169,219],"does":[171],"not":[172,210],"do":[173],"justice":[174],"more":[177],"profound":[178],"differences":[179],"ultimately":[183],"speak":[187],"them,":[188],"embody.":[189],"Drawing":[190],"on":[191],"concept":[193],"epistemic":[195],"injustice,":[196],"point":[198],"broader":[201],"ethico-political":[202],"implications":[203],"it":[207],"lead":[209],"only":[211],"disregard":[214],"valuable":[216],"aspects":[217],"but":[220],"also":[221],"an":[223,235],"under-representation":[224],"needs":[227],"marginalized":[229],"Finally,":[232],"present":[234],"alternative":[236],"socio-technical":[237],"approach":[238],"designed":[241],"tackle":[243],"analyzed":[247],"problems.":[248]},"counts_by_year":[{"year":2026,"cited_by_count":14},{"year":2025,"cited_by_count":36},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":2}],"updated_date":"2025-12-26T23:08:49.675405","created_date":"2025-10-10T00:00:00"}
