{"id":"https://openalex.org/W7160932991","doi":"https://doi.org/10.48550/arxiv.2605.09955","title":"Beyond Majority Voting: Agreement-Based Clustering to Model Annotator Perspectives in Subjective NLP Tasks","display_name":"Beyond Majority Voting: Agreement-Based Clustering to Model Annotator Perspectives in Subjective NLP Tasks","publication_year":2026,"publication_date":"2026-05-11","ids":{"openalex":"https://openalex.org/W7160932991","doi":"https://doi.org/10.48550/arxiv.2605.09955"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.09955","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.09955","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.09955","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040546523","display_name":"Tadesse Destaw Belay","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Belay, Tadesse Destaw","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135950374","display_name":"Ibrahim Said Ahmad","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ahmad, Ibrahim Said","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135954165","display_name":"Idris Abdulmumin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abdulmumin, Idris","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135982104","display_name":"Abinew Ali Ayele","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ayele, Abinew Ali","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135915851","display_name":"Alexander Gelbukh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gelbukh, Alexander","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135911306","display_name":"Eusebio Ric\u00e1rdez-V\u00e1zquez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ric\u00e1rdez-V\u00e1zquez, Eusebio","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135952034","display_name":"Olga Kolesnikova","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kolesnikova, Olga","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135975685","display_name":"Shamsuddeen Hassan Muhammad","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Muhammad, Shamsuddeen Hassan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135930805","display_name":"Seid Muhie Yimam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yimam, Seid Muhie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.5899999737739563,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.5899999737739563,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.1429000049829483,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.04410000145435333,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7192999720573425},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6136999726295471},{"id":"https://openalex.org/keywords/majority-rule","display_name":"Majority rule","score":0.5422000288963318},{"id":"https://openalex.org/keywords/voting","display_name":"Voting","score":0.5317000150680542},{"id":"https://openalex.org/keywords/sentiment-analysis","display_name":"Sentiment analysis","score":0.47189998626708984},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.445499986410141}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7872999906539917},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.732699990272522},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7192999720573425},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6460999846458435},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6136999726295471},{"id":"https://openalex.org/C153668964","wikidata":"https://www.wikidata.org/wiki/Q27636","display_name":"Majority rule","level":2,"score":0.5422000288963318},{"id":"https://openalex.org/C520049643","wikidata":"https://www.wikidata.org/wiki/Q189760","display_name":"Voting","level":3,"score":0.5317000150680542},{"id":"https://openalex.org/C66402592","wikidata":"https://www.wikidata.org/wiki/Q2271421","display_name":"Sentiment analysis","level":2,"score":0.47189998626708984},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.445499986410141},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.37400001287460327},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35679998993873596},{"id":"https://openalex.org/C104054115","wikidata":"https://www.wikidata.org/wiki/Q216828","display_name":"Cohesion (chemistry)","level":2,"score":0.30730000138282776},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3005000054836273},{"id":"https://openalex.org/C2776818064","wikidata":"https://www.wikidata.org/wiki/Q829903","display_name":"Agreement","level":2,"score":0.28630000352859497},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.27230000495910645}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.09955","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.09955","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.09955","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.09955","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5646061897277832}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Disagreement":[0],"in":[1,7,72,75,123],"annotation":[2],"is":[3,46],"a":[4,16],"common":[5],"phenomenon":[6],"the":[8,25,63,66,112,136,139],"development":[9],"of":[10,19,115],"NLP":[11,53,83,125],"datasets":[12,74],"and":[13,48,89,102,118,131,141,153],"serves":[14],"as":[15],"valuable":[17],"source":[18],"insight.":[20],"While":[21],"majority":[22,98,129,155],"voting":[23,130],"remains":[24,49],"dominant":[26],"strategy":[27],"for":[28,146],"aggregating":[29],"labels,":[30],"recent":[31],"work":[32],"has":[33],"explored":[34],"modeling":[35,43,147],"individual":[36,132],"annotators":[37,149],"to":[38,61,128],"preserve":[39],"their":[40],"perspectives.":[41],"However,":[42],"each":[44],"annotator":[45,116,133],"resource-intensive":[47],"underexplored":[50],"across":[51],"various":[52],"tasks.":[54],"We":[55,68,93],"propose":[56],"an":[57,151],"agreement-based":[58,108],"clustering":[59,109],"technique":[60],"model":[62,154],"disagreement":[64],"between":[65],"annotators.":[67],"conduct":[69],"comprehensive":[70],"experiments":[71],"40":[73],"18":[76],"typologically":[77],"diverse":[78],"languages,":[79],"covering":[80],"three":[81],"subjective":[82,124],"tasks:":[84],"sentiment":[85],"analysis,":[86],"emotion":[87],"classification,":[88],"hate":[90],"speech":[91],"detection.":[92],"evaluate":[94],"four":[95],"aggregation":[96,137],"approaches:":[97],"vote,":[99],"ensemble,":[100],"multi-label,":[101],"multitask.":[103],"The":[104],"results":[105],"demonstrate":[106],"that":[107],"can":[110],"leverage":[111],"full":[113],"spectrum":[114],"perspectives":[117],"significantly":[119],"enhance":[120],"classification":[121],"performance":[122],"tasks":[126],"compared":[127],"modeling.":[134],"Regarding":[135],"approach,":[138],"multi-label":[140],"multitask":[142],"approaches":[143],"are":[144],"better":[145],"clustered":[148],"than":[150],"ensemble":[152],"vote.":[156]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-13T00:00:00"}
