{"id":"https://openalex.org/W7127652662","doi":"https://doi.org/10.1109/dese68208.2025.11367955","title":"Enhancing RLHF in LLMs: Comparing BERT, XGBoost, and Deep Models for Bias Detection","display_name":"Enhancing RLHF in LLMs: Comparing BERT, XGBoost, and Deep Models for Bias Detection","publication_year":2025,"publication_date":"2025-11-10","ids":{"openalex":"https://openalex.org/W7127652662","doi":"https://doi.org/10.1109/dese68208.2025.11367955"},"language":null,"primary_location":{"id":"doi:10.1109/dese68208.2025.11367955","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dese68208.2025.11367955","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 18th International Conference on Development in eSystem Engineering (DeSE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125024322","display_name":"Walid Torfa","orcid":null},"institutions":[{"id":"https://openalex.org/I29891158","display_name":"University of Sharjah","ror":"https://ror.org/00engpz63","country_code":"AE","type":"education","lineage":["https://openalex.org/I29891158"]}],"countries":["AE"],"is_corresponding":true,"raw_author_name":"Walid Torfa","raw_affiliation_strings":["University of Sharjah,Department of Electrical and Electronics Engineering,Sharjah,United Arab Emirates"],"affiliations":[{"raw_affiliation_string":"University of Sharjah,Department of Electrical and Electronics Engineering,Sharjah,United Arab Emirates","institution_ids":["https://openalex.org/I29891158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042516440","display_name":"Omar Elgendy","orcid":null},"institutions":[{"id":"https://openalex.org/I29891158","display_name":"University of Sharjah","ror":"https://ror.org/00engpz63","country_code":"AE","type":"education","lineage":["https://openalex.org/I29891158"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Omar Elgendy","raw_affiliation_strings":["University of Sharjah,Department of Computer Engineering,Sharjah,United Arab Emirates"],"affiliations":[{"raw_affiliation_string":"University of Sharjah,Department of Computer Engineering,Sharjah,United Arab Emirates","institution_ids":["https://openalex.org/I29891158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033276797","display_name":"Ali Nassif","orcid":null},"institutions":[{"id":"https://openalex.org/I29891158","display_name":"University of Sharjah","ror":"https://ror.org/00engpz63","country_code":"AE","type":"education","lineage":["https://openalex.org/I29891158"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Ali Bou Nassif","raw_affiliation_strings":["University of Sharjah,Department of Computer Engineering,Sharjah,United Arab Emirates"],"affiliations":[{"raw_affiliation_string":"University of Sharjah,Department of Computer Engineering,Sharjah,United Arab Emirates","institution_ids":["https://openalex.org/I29891158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5125024322"],"corresponding_institution_ids":["https://openalex.org/I29891158"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.88352901,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"226","last_page":"231"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.17239999771118164,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.17239999771118164,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.0925000011920929,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.07349999994039536,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5491999983787537},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.46140000224113464},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.35589998960494995},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.34880000352859497},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.32409998774528503},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.2957000136375427}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7490000128746033},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6309000253677368},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5648999810218811},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5491999983787537},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.46140000224113464},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.35589998960494995},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.34880000352859497},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.32409998774528503},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2957000136375427},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.2727000117301941},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2597000002861023},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.25459998846054077}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dese68208.2025.11367955","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dese68208.2025.11367955","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 18th International Conference on Development in eSystem Engineering (DeSE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Climate action","score":0.638407826423645,"id":"https://metadata.un.org/sdg/13"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W2584117724","https://openalex.org/W2921355398","https://openalex.org/W2962990575","https://openalex.org/W3046292371","https://openalex.org/W3104617516","https://openalex.org/W3128232076","https://openalex.org/W3167443963","https://openalex.org/W3176477796","https://openalex.org/W4225295315","https://openalex.org/W4306845373","https://openalex.org/W4385574259","https://openalex.org/W4388488349","https://openalex.org/W4389978940","https://openalex.org/W4399528455","https://openalex.org/W4403289668"],"related_works":[],"abstract_inverted_index":{"This":[0,47],"study":[1],"explores":[2],"bias":[3,51,125],"detection":[4,134],"in":[5,91],"large":[6],"language":[7],"models":[8,135],"(LLMs)":[9],"to":[10],"support":[11],"Reinforcement":[12],"Learning":[13],"from":[14],"Human":[15],"Feedback":[16],"(RLHF)":[17],"pipelines":[18],"aimed":[19],"at":[20,108],"reducing":[21],"social":[22],"bias.":[23,59],"We":[24],"compare":[25],"three":[26],"classifiers-a":[27],"frozen":[28],"BERT-based":[29,76],"model":[30,77,98],"with":[31,102,111],"a":[32,35],"classification":[33,126],"head,":[34],"custom":[36],"deep":[37,96],"learning":[38,97],"model,":[39],"and":[40,57,84,120,127],"an":[41],"XGBoost":[42,106],"model-on":[43],"the":[44,75,79,129],"StereoSet":[45],"dataset.":[46],"dataset":[48],"includes":[49],"five":[50],"categories:":[52],"gender,":[53],"profession,":[54],"race,":[55],"religion,":[56],"no":[58],"To":[60],"address":[61],"class":[62],"imbalance,":[63],"we":[64],"generated":[65],"synthetic":[66],"samples":[67],"using":[68],"Google's":[69],"gemma-2-9b-it":[70],"model.":[71],"Results":[72],"show":[73],"that":[74,117],"achieved":[78],"highest":[80],"overall":[81],"accuracy":[82],"0.92":[83],"outperformed":[85],"others":[86],"across":[87],"all":[88],"categories,":[89],"particularly":[90],"detecting":[92],"subtle":[93],"biases.":[94],"The":[95],"was":[99],"ranked":[100],"second":[101],"0.85":[103],"accuracy,":[104],"while":[105],"lagged":[107],"0.75,":[109],"struggling":[110],"no-bias":[112],"detection.":[113],"These":[114],"findings":[115],"confirm":[116],"architecture":[118],"choice":[119],"training":[121],"strategy":[122],"significantly":[123],"impact":[124],"reinforce":[128],"importance":[130],"of":[131],"integrating":[132],"reliable":[133],"into":[136],"RLHF":[137],"workflows.":[138]},"counts_by_year":[],"updated_date":"2026-02-20T08:17:22.645390","created_date":"2026-02-06T00:00:00"}
