{"id":"https://openalex.org/W7160877335","doi":"https://doi.org/10.48550/arxiv.2605.06940","title":"MultiSoc-4D: A Benchmark for Diagnosing Instruction-Induced Label Collapse in Closed-Set LLM Annotation of Bengali Social Media","display_name":"MultiSoc-4D: A Benchmark for Diagnosing Instruction-Induced Label Collapse in Closed-Set LLM Annotation of Bengali Social Media","publication_year":2026,"publication_date":"2026-05-07","ids":{"openalex":"https://openalex.org/W7160877335","doi":"https://doi.org/10.48550/arxiv.2605.06940"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.06940","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06940","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.06940","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016709357","display_name":"Souvik Pramanik","orcid":"https://orcid.org/0000-0002-5778-4534"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pramanik, Souvik","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124171617","display_name":"S.M. Riaz Rahman Antu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Antu, S. M. Riaz Rahman","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124294770","display_name":"Shak Mohammad Abyad","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abyad, Shak Mohammad","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135842695","display_name":"Md. Ibrahim Khalil","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khalil, Md. Ibrahim","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5108260951","display_name":"Md. Shahriar Hussain","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hussain, Md. Shahriar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.22689999639987946,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.22689999639987946,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.13330000638961792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.10909999907016754,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.8784000277519226},{"id":"https://openalex.org/keywords/bengali","display_name":"Bengali","score":0.8235999941825867},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7369999885559082},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.6489999890327454},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5927000045776367},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.527400016784668},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.49000000953674316},{"id":"https://openalex.org/keywords/sarcasm","display_name":"Sarcasm","score":0.43149998784065247}],"concepts":[{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.8784000277519226},{"id":"https://openalex.org/C19235068","wikidata":"https://www.wikidata.org/wiki/Q9610","display_name":"Bengali","level":2,"score":0.8235999941825867},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7369999885559082},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6942999958992004},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6898000240325928},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6780999898910522},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.6489999890327454},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5927000045776367},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.527400016784668},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.49000000953674316},{"id":"https://openalex.org/C2776207355","wikidata":"https://www.wikidata.org/wiki/Q191035","display_name":"Sarcasm","level":3,"score":0.43149998784065247},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.390500009059906},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3801000118255615},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.3652999997138977},{"id":"https://openalex.org/C2780922921","wikidata":"https://www.wikidata.org/wiki/Q255189","display_name":"Paraphrase","level":2,"score":0.34929999709129333},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.3452000021934509},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.30320000648498535},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.29919999837875366},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2761000096797943},{"id":"https://openalex.org/C36382193","wikidata":"https://www.wikidata.org/wiki/Q7068966","display_name":"Null model","level":2,"score":0.27090001106262207},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.26899999380111694},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.25200000405311584},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.2500999867916107}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.06940","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06940","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.06940","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06940","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Annotation":[0],"automation":[1],"via":[2,154],"Large":[3],"Language":[4],"Models":[5],"(LLMs)":[6],"is":[7],"the":[8,174],"core":[9],"approach":[10],"for":[11,188],"scaling":[12],"NLP":[13],"datasets;":[14],"however,":[15],"LLM":[16,84],"behavior":[17,85],"with":[18,132],"respect":[19],"to":[20,109,125,138],"closed-set":[21],"instructions":[22],"in":[23,191],"low-resource":[24],"languages":[25],"has":[26],"not":[27],"been":[28],"well":[29],"studied.":[30],"We":[31,87,181],"present":[32],"MultiSoc-4D,":[33],"a":[34,61,76,89,99,139,148,185],"Bengali":[35,192],"social":[36,43],"media":[37,44],"dataset":[38],"benchmark,":[39],"which":[40],"contains":[41],"58K+":[42],"comments":[45],"from":[46],"six":[47],"sources":[48],"annotated":[49],"along":[50],"four":[51],"dimensions:":[52],"category,":[53],"sentiment,":[54],"hate":[55],"speech,":[56],"and":[57,68,128,134],"sarcasm.":[58],"By":[59],"employing":[60],"structured":[62],"pipeline":[63],"where":[64],"ChatGPT,":[65],"Gemini,":[66],"Claude,":[67],"Grok":[69],"individually":[70],"annotate":[71],"separate":[72],"partitions,":[73],"while":[74],"sharing":[75],"common":[77],"validation":[78],"set":[79],"of":[80,115,130,178],"20%,":[81],"we":[82,120,143,167],"diagnose":[83],"systematically.":[86],"discover":[88],"prevalent":[90],"phenomenon":[91],"called":[92],"\"instruction-induced":[93],"label":[94],"collapse\",":[95],"wherein":[96],"LLMs":[97,123],"show":[98],"systematic":[100],"preference":[101],"towards":[102],"fallback":[103],"labels":[104],"(Other,":[105],"Neutral,":[106],"No),":[107],"leading":[108],"high":[110],"agreement":[111,150],"rates":[112],"but":[113],"under-detection":[114],"minority":[116],"categories.":[117],"For":[118],"example,":[119],"find":[121],"that":[122,145],"failed":[124],"detect":[126],"79%":[127],"75%":[129],"instances":[131],"hateful":[133],"sarcastic":[135],"content":[136],"compared":[137],"human-calibrated":[140],"reference.":[141],"Furthermore,":[142],"prove":[144],"it":[146],"represents":[147],"\"label":[149],"illusion\",":[151],"statistically":[152],"validated":[153],"almost":[155],"null":[156],"Fleiss'":[157],"Kappa":[158],"($\u03ba\\approx":[159],"-0.001$)":[160],"on":[161],"sarcasm":[162],"detection.":[163],"Across":[164],"40+":[165],"LLMs,":[166],"benchmark":[168,187],"this":[169],"annotation":[170,189],"bias":[171],"propagation":[172],"within":[173],"training":[175],"pipeline,":[176],"regardless":[177],"architectural":[179],"differences.":[180],"release":[182],"MultiSoc-4D":[183],"as":[184],"diagnostic":[186],"biases":[190],"NLP.":[193]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-12T00:00:00"}
