{"id":"https://openalex.org/W4399062612","doi":"https://doi.org/10.48550/arxiv.2405.15760","title":"GPT is Not an Annotator: The Necessity of Human Annotation in Fairness Benchmark Construction","display_name":"GPT is Not an Annotator: The Necessity of Human Annotation in Fairness Benchmark Construction","publication_year":2024,"publication_date":"2024-05-24","ids":{"openalex":"https://openalex.org/W4399062612","doi":"https://doi.org/10.48550/arxiv.2405.15760"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2405.15760","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.15760","pdf_url":"https://arxiv.org/pdf/2405.15760","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2405.15760","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075490667","display_name":"Virginia K. Felkner","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Felkner, Virginia K.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102659630","display_name":"Jennifer A. Thompson","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Thompson, Jennifer A.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5000874697","display_name":"Jonathan May","orcid":"https://orcid.org/0000-0002-5284-477X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"May, Jonathan","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5075490667"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9477999806404114,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8026928901672363},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.6439012885093689},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4726492464542389},{"id":"https://openalex.org/keywords/agreement","display_name":"Agreement","score":0.42082691192626953},{"id":"https://openalex.org/keywords/business","display_name":"Business","score":0.32579219341278076},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.21965625882148743},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.10180965065956116},{"id":"https://openalex.org/keywords/philosophy","display_name":"Philosophy","score":0.06735783815383911}],"concepts":[{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8026928901672363},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.6439012885093689},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4726492464542389},{"id":"https://openalex.org/C2776818064","wikidata":"https://www.wikidata.org/wiki/Q829903","display_name":"Agreement","level":2,"score":0.42082691192626953},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.32579219341278076},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.21965625882148743},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.10180965065956116},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.06735783815383911},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2405.15760","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.15760","pdf_url":"https://arxiv.org/pdf/2405.15760","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2405.15760","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2405.15760","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2405.15760","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.15760","pdf_url":"https://arxiv.org/pdf/2405.15760","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399062612.pdf","grobid_xml":"https://content.openalex.org/works/W4399062612.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2361861616","https://openalex.org/W2263699433","https://openalex.org/W2377979023","https://openalex.org/W2218034408","https://openalex.org/W2392921965","https://openalex.org/W2378211422","https://openalex.org/W4361282947","https://openalex.org/W2250493512"],"abstract_inverted_index":{"Social":[0],"biases":[1],"in":[2,15,113,129],"LLMs":[3],"are":[4],"usually":[5],"measured":[6],"via":[7],"bias":[8,67,148],"benchmark":[9,36,68],"datasets.":[10],"Current":[11],"benchmarks":[12],"have":[13],"limitations":[14],"scope,":[16],"grounding,":[17],"quality,":[18],"and":[19,87,94,108,136],"human":[20,127],"effort":[21,44],"required.":[22],"Previous":[23],"work":[24,40,82],"has":[25,101],"shown":[26],"success":[27],"with":[28,47,61],"a":[29,66,84],"community-sourced,":[30],"rather":[31],"than":[32],"crowd-sourced,":[33],"approach":[34],"to":[35,72,83,133],"development.":[37],"However,":[38],"this":[39,105],"still":[41],"required":[42],"considerable":[43],"from":[45,70],"annotators":[46],"relevant":[48],"lived":[49],"experience.":[50],"This":[51],"paper":[52],"explores":[53],"whether":[54],"an":[55,73,123],"LLM":[56],"(specifically,":[57],"GPT-3.5-Turbo)":[58],"can":[59],"assist":[60],"the":[62,80,91,144],"task":[63,107],"of":[64,89,143,146],"developing":[65],"dataset":[69],"responses":[71],"open-ended":[74],"community":[75,86,93],"survey.":[76],"We":[77],"also":[78],"extend":[79],"previous":[81],"new":[85],"set":[88],"biases:":[90],"Jewish":[92],"antisemitism.":[95],"Our":[96],"analysis":[97],"shows":[98],"that":[99,119,137],"GPT-3.5-Turbo":[100,120],"poor":[102],"performance":[103],"on":[104],"annotation":[106,128],"produces":[109],"unacceptable":[110],"quality":[111],"issues":[112],"its":[114,138],"output.":[115],"Thus,":[116],"we":[117],"conclude":[118],"is":[121],"not":[122],"appropriate":[124],"substitute":[125],"for":[126],"sensitive":[130],"tasks":[131],"related":[132],"social":[134],"biases,":[135],"use":[139],"actually":[140],"negates":[141],"many":[142],"benefits":[145],"community-sourcing":[147],"benchmarks.":[149]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
