{"id":"https://openalex.org/W4403578773","doi":"https://doi.org/10.48550/arxiv.2410.12874","title":"On Debiasing Text Embeddings Through Context Injection","display_name":"On Debiasing Text Embeddings Through Context Injection","publication_year":2024,"publication_date":"2024-10-14","ids":{"openalex":"https://openalex.org/W4403578773","doi":"https://doi.org/10.48550/arxiv.2410.12874"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2410.12874","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.12874","pdf_url":"https://arxiv.org/pdf/2410.12874","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2410.12874","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114337720","display_name":"Thomas Uriot","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Uriot, Thomas","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5114337720"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9824000000953674,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9824000000953674,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9797999858856201,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9369000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/debiasing","display_name":"Debiasing","score":0.9848779439926147},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6553000211715698},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6040732264518738},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.22195053100585938},{"id":"https://openalex.org/keywords/cognitive-science","display_name":"Cognitive science","score":0.15650230646133423},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.15647435188293457},{"id":"https://openalex.org/keywords/paleontology","display_name":"Paleontology","score":0.0733366310596466}],"concepts":[{"id":"https://openalex.org/C2779458634","wikidata":"https://www.wikidata.org/wiki/Q24963715","display_name":"Debiasing","level":2,"score":0.9848779439926147},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6553000211715698},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6040732264518738},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.22195053100585938},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.15650230646133423},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.15647435188293457},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0733366310596466}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2410.12874","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.12874","pdf_url":"https://arxiv.org/pdf/2410.12874","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2410.12874","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2410.12874","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2410.12874","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.12874","pdf_url":"https://arxiv.org/pdf/2410.12874","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4403578773.pdf","grobid_xml":"https://content.openalex.org/works/W4403578773.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4362554880","https://openalex.org/W4281684980","https://openalex.org/W4386875279","https://openalex.org/W2171721708","https://openalex.org/W4390963114","https://openalex.org/W4287887864","https://openalex.org/W3214527415"],"abstract_inverted_index":{"Current":[0],"advances":[1,72],"in":[2,52,73,149,157],"Natural":[3],"Language":[4],"Processing":[5],"(NLP)":[6],"have":[7,58],"made":[8],"it":[9,39],"increasingly":[10],"feasible":[11],"to":[12,61,104,121,161,169,190],"build":[13],"applications":[14,23],"leveraging":[15],"textual":[16],"data.":[17],"Generally,":[18],"the":[19,70],"core":[20],"of":[21,31,69,76,91,110],"these":[22,44],"rely":[24],"on":[25],"having":[26],"a":[27,55,89,108,150,171],"good":[28],"semantic":[29],"representation":[30],"text":[32],"into":[33],"vectors,":[34],"via":[35],"embedding":[36,78,93,145],"models.":[37,79],"However,":[38],"has":[40],"been":[41,59],"shown":[42],"that":[43,114,134,155,185],"embeddings":[45,158],"capture":[46],"and":[47,99,195],"perpetuate":[48],"biases":[49,98,156],"already":[50],"present":[51],"text.":[53],"While":[54],"few":[56],"techniques":[57],"proposed":[60],"debias":[62],"embeddings,":[63],"they":[64,102,142],"do":[65],"not":[66],"take":[67],"advantage":[68],"recent":[71],"context":[74,105],"understanding":[75],"modern":[77],"In":[80],"this":[81,85],"paper,":[82],"we":[83,132,153],"fill":[84],"gap":[86],"by":[87,95],"conducting":[88],"review":[90],"19":[92],"models":[94,117,136],"quantifying":[96],"their":[97],"how":[100],"well":[101],"respond":[103],"injection":[106],"as":[107],"mean":[109],"debiasing.":[111],"We":[112,164,183],"show":[113,154,184],"higher":[115],"performing":[116],"are":[118,125],"more":[119],"prone":[120],"capturing":[122],"biases,":[123],"but":[124],"also":[126],"better":[127],"at":[128,144],"incorporating":[129],"context.":[130],"Surprisingly,":[131],"find":[133],"while":[135],"can":[137,159],"easily":[138],"embed":[139],"affirmative":[140],"semantics,":[141],"fail":[143],"neutral":[146,196],"semantics.":[147],"Finally,":[148],"retrieval":[151],"task,":[152],"lead":[160],"non-desirable":[162],"outcomes.":[163],"use":[165],"our":[166,186],"new-found":[167],"insights":[168],"design":[170],"simple":[172],"algorithm":[173,187],"for":[174],"top":[175],"$k$":[176,179],"retrieval,":[177],"where":[178],"is":[180,188],"dynamically":[181],"selected.":[182],"able":[189],"retrieve":[191],"all":[192],"relevant":[193],"gendered":[194],"chunks.":[197]},"counts_by_year":[],"updated_date":"2026-03-14T08:43:22.919905","created_date":"2025-10-10T00:00:00"}
