{"id":"https://openalex.org/W7129306256","doi":"https://doi.org/10.48550/arxiv.2602.14466","title":"Robust Bias Evaluation with FilBBQ: A Filipino Bias Benchmark for Question-Answering Language Models","display_name":"Robust Bias Evaluation with FilBBQ: A Filipino Bias Benchmark for Question-Answering Language Models","publication_year":2026,"publication_date":"2026-02-16","ids":{"openalex":"https://openalex.org/W7129306256","doi":"https://doi.org/10.48550/arxiv.2602.14466"},"language":"en","primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.14466","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037777763","display_name":"Lance Calvin Lim Gamboa","orcid":"https://orcid.org/0000-0003-0095-1335"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Gamboa, Lance Calvin Lim","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126255021","display_name":"Yue Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Yue","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126173564","display_name":"Mark Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Mark","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5037777763"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.5627999901771545,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.5627999901771545,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.1379999965429306,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.05979999899864197,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7369999885559082},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.5425999760627747},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4941999912261963},{"id":"https://openalex.org/keywords/scope","display_name":"Scope (computer science)","score":0.4505999982357025},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4165000021457672},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.3732999861240387},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.3531999886035919}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7369999885559082},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5508000254631042},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.5425999760627747},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5034000277519226},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4941999912261963},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4884999990463257},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.48500001430511475},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4641000032424927},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.4505999982357025},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4165000021457672},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.3732999861240387},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.3531999886035919},{"id":"https://openalex.org/C40423286","wikidata":"https://www.wikidata.org/wiki/Q284172","display_name":"Selection bias","level":2,"score":0.3490000069141388},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.34439998865127563},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.34220001101493835},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.3377000093460083},{"id":"https://openalex.org/C2983427547","wikidata":"https://www.wikidata.org/wiki/Q93200","display_name":"Gender bias","level":2,"score":0.3043000102043152},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2874999940395355},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.2833999991416931},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.2784000039100647},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.2727000117301941},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.25690001249313354}],"mesh":[],"locations_count":5,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.14466","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/c388173e-f437-40e4-8396-67e8fd94c5d7","is_oa":true,"landing_page_url":"https://research.birmingham.ac.uk/en/publications/c388173e-f437-40e4-8396-67e8fd94c5d7","pdf_url":null,"source":{"id":"https://openalex.org/S4306402634","display_name":"University of Birmingham Research Portal (University of Birmingham)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79619799","host_organization_name":"University of Birmingham","host_organization_lineage":["https://openalex.org/I79619799"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Gamboa, L C L, Feng, Y & Lee, M 2026 'Robust Bias Evaluation with FilBBQ : A Filipino Bias Benchmark for Question-Answering Language Models' arXiv. https://doi.org/10.48550/arXiv.2602.14466","raw_type":"workingPaper"},{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/eacfb2d8-5428-4b36-8c34-2c134d934a6a","is_oa":false,"landing_page_url":"https://research.birmingham.ac.uk/en/publications/eacfb2d8-5428-4b36-8c34-2c134d934a6a","pdf_url":null,"source":{"id":"https://openalex.org/S4306402634","display_name":"University of Birmingham Research Portal (University of Birmingham)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79619799","host_organization_name":"University of Birmingham","host_organization_lineage":["https://openalex.org/I79619799"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Gamboa, L C, Feng, Y & Lee, M 2026, Robust Bias Evaluation with FilBBQ : A Filipino Bias Benchmark for Question-Answering Language Models. in Proceedings of The Fifteenth Language Resources and Evaluation Conference. Association for Computational Linguistics, ACL, Fifteenth Language Resources and Evaluation Conference, Palma, Spain, 13/05/26.","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:pure.atira.dk:publications/eacfb2d8-5428-4b36-8c34-2c134d934a6a","is_oa":true,"landing_page_url":"https://aclanthology.org/venues/lrec/","pdf_url":null,"source":{"id":"https://openalex.org/S4306402634","display_name":"University of Birmingham Research Portal (University of Birmingham)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79619799","host_organization_name":"University of Birmingham","host_organization_lineage":["https://openalex.org/I79619799"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Gamboa, L C, Feng, Y & Lee, M 2026, Robust Bias Evaluation with FilBBQ : A Filipino Bias Benchmark for Question-Answering Language Models. in Proceedings of The Fifteenth Language Resources and Evaluation Conference. Association for Computational Linguistics, ACL, Fifteenth Language Resources and Evaluation Conference, Palma, Spain, 13/05/26.","raw_type":"contributionToPeriodical"},{"id":"doi:10.48550/arxiv.2602.14466","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.14466","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.14466","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/5","score":0.665306031703949,"display_name":"Gender equality"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"With":[0],"natural":[1],"language":[2,10],"generation":[3],"becoming":[4],"a":[5,45,66,102],"popular":[6],"use":[7],"case":[8],"for":[9,15,26,120],"models,":[11],"the":[12,36,86,109,133,146,155],"Bias":[13],"Benchmark":[14],"Question-Answering":[16],"(BBQ)":[17],"has":[18],"grown":[19],"to":[20,85,163],"be":[21],"an":[22],"important":[23],"benchmark":[24],"format":[25],"evaluating":[27],"stereotypical":[28],"associations":[29],"exhibited":[30],"by":[31,124],"generative":[32],"models.":[33],"We":[34,89],"expand":[35],"linguistic":[37],"scope":[38],"of":[39,50,70,113,148,157],"BBQ":[40,115],"and":[41,59,81,111,131,154,159,169],"construct":[42],"FilBBQ":[43,92,171],"through":[44],"four-phase":[46],"development":[47],"process":[48],"consisting":[49],"template":[51,57],"categorization,":[52],"culturally":[53],"aware":[54],"translation,":[55],"new":[56],"construction,":[58],"prompt":[60,126],"generation.":[61],"These":[62],"processes":[63],"resulted":[64],"in":[65,96],"bias":[67,134,149],"test":[68],"composed":[69],"more":[71],"than":[72],"10,000":[73],"prompts":[74],"which":[75],"assess":[76],"whether":[77],"models":[78,94],"demonstrate":[79],"sexist":[80,158],"homophobic":[82,160],"prejudices":[83],"relevant":[84],"Philippine":[87],"context.":[88],"then":[90],"apply":[91],"on":[93],"trained":[95],"Filipino":[97],"but":[98],"do":[99],"so":[100],"with":[101],"robust":[103],"evaluation":[104],"protocol":[105],"that":[106],"improves":[107],"upon":[108],"reliability":[110],"accuracy":[112],"previous":[114],"implementations.":[116],"Specifically,":[117],"we":[118],"account":[119],"models'":[121],"response":[122],"instability":[123],"obtaining":[125],"responses":[127],"across":[128,151],"multiple":[129],"seeds":[130,153],"averaging":[132],"scores":[135,150],"calculated":[136],"from":[137],"these":[138],"distinctly":[139],"seeded":[140],"runs.":[141],"Our":[142],"results":[143],"confirm":[144],"both":[145],"variability":[147],"different":[152],"presence":[156],"biases":[161],"relating":[162],"emotion,":[164],"domesticity,":[165],"stereotyped":[166],"queer":[167],"interests,":[168],"polygamy.":[170],"is":[172],"available":[173],"via":[174],"GitHub.":[175]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-18T00:00:00"}
