{"id":"https://openalex.org/W7161157539","doi":"https://doi.org/10.48550/arxiv.2605.12510","title":"WhatsApp Vaccine Discourse (WhaVax): An Expert-Annotated Dataset and Benchmark for Health Misinformation Detection","display_name":"WhatsApp Vaccine Discourse (WhaVax): An Expert-Annotated Dataset and Benchmark for Health Misinformation Detection","publication_year":2026,"publication_date":"2026-03-25","ids":{"openalex":"https://openalex.org/W7161157539","doi":"https://doi.org/10.48550/arxiv.2605.12510"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.12510","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12510","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.12510","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136147087","display_name":"J\u00f4natas H. dos Santos","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Santos, J\u00f4natas H. dos","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136118079","display_name":"Julio C. S. Reis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Reis, Julio C. S.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019655566","display_name":"Philipe Melo","orcid":"https://orcid.org/0000-0001-9830-1896"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Melo, Philipe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122754429","display_name":"Jo\u00e3o F. H. Olivetti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Olivetti, Jo\u00e3o F. H.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059356819","display_name":"Thales H. Silva","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Silva, Thales H.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136104804","display_name":"Matheus Gontijo Guimaraes","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guimaraes, Matheus Gontijo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058476254","display_name":"Glaucio de Souza","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"de Souza, Glaucio","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136151551","display_name":"Marcos A. Gon\u00e7alves","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gon\u00e7alves, Marcos A.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085054435","display_name":"Fabr\u00ed\u00adcio Benevenuto","orcid":"https://orcid.org/0000-0001-6875-6259"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Benevenuto, Fabricio","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5095862819","display_name":"Filipe B.B. Zanovello","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zanovello, Filipe B. B.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120668354","display_name":"Marco A. G. Rodrigues","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rodrigues, Marco A. G.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136127979","display_name":"Cristiano X. Lima","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lima, Cristiano X.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.9509000182151794,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.9509000182151794,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11819","display_name":"Data-Driven Disease Surveillance","score":0.011599999852478504,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10833","display_name":"Vaccine Coverage and Hesitancy","score":0.0035000001080334187,"subfield":{"id":"https://openalex.org/subfields/3306","display_name":"Health"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6571000218391418},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.614300012588501},{"id":"https://openalex.org/keywords/misinformation","display_name":"Misinformation","score":0.6011000275611877},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4993000030517578},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.4846999943256378},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.3912999927997589},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.38839998841285706},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.3813999891281128}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7351999878883362},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6571000218391418},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.614300012588501},{"id":"https://openalex.org/C2776990098","wikidata":"https://www.wikidata.org/wiki/Q13579947","display_name":"Misinformation","level":2,"score":0.6011000275611877},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4993000030517578},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.4846999943256378},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.40639999508857727},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.3912999927997589},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.38839998841285706},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3813999891281128},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.3637999892234802},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3594000041484833},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35600000619888306},{"id":"https://openalex.org/C148730421","wikidata":"https://www.wikidata.org/wiki/Q141090","display_name":"Encryption","level":2,"score":0.3434000015258789},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3409000039100647},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3294000029563904},{"id":"https://openalex.org/C2778080475","wikidata":"https://www.wikidata.org/wiki/Q4353774","display_name":"Health communication","level":2,"score":0.319599986076355},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.29120001196861267},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2883000075817108},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2824999988079071},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.27489998936653137},{"id":"https://openalex.org/C512654426","wikidata":"https://www.wikidata.org/wiki/Q19652","display_name":"Public domain","level":2,"score":0.2671999931335449},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C138816342","wikidata":"https://www.wikidata.org/wiki/Q189603","display_name":"Public health","level":2,"score":0.2621000111103058},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.25769999623298645},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.2529999911785126},{"id":"https://openalex.org/C142210648","wikidata":"https://www.wikidata.org/wiki/Q1266546","display_name":"Record linkage","level":3,"score":0.2502000033855438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.12510","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12510","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.12510","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.12510","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/3","display_name":"Good health and well-being","score":0.5274975895881653}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,106],"introduce":[1],"WhaVax,":[2],"a":[3,26,43,54,72,90,147],"new":[4],"expert-annotated":[5],"dataset":[6,22],"of":[7,75,93,100],"vaccine-related":[8],"WhatsApp":[9,76],"messages":[10],"collected":[11],"from":[12],"large":[13],"Brazilian":[14],"public":[15],"groups":[16],"spanning":[17],"multiple":[18],"pandemic":[19],"years.":[20],"The":[21],"was":[23],"constructed":[24],"through":[25],"rigorous,":[27],"carefully":[28],"designed":[29],"pipeline":[30],"that":[31,96,127],"integrates":[32],"keyword-based":[33],"data":[34,139],"collection,":[35],"semantic":[36],"deduplication":[37],"to":[38,151],"remove":[39],"near-duplicate":[40],"content,":[41],"and":[42,63,84,115,130,138,155],"multi-stage":[44],"annotation":[45],"protocol":[46],"conducted":[47],"by":[48,59],"medical":[49],"specialists.":[50],"This":[51,144],"process":[52],"produced":[53],"high-quality":[55,149],"gold-standard":[56],"corpus,":[57],"characterized":[58],"substantial":[60],"inter-annotator":[61],"agreement":[62],"strong":[64,128],"reliability":[65],"for":[66],"downstream":[67],"analysis.":[68],"Additionally,":[69],"we":[70],"provide":[71],"detailed":[73],"characterization":[74],"misinformation,":[77],"revealing":[78],"distinctive":[79],"linguistic,":[80],"structural,":[81],"lexical,":[82],"temporal,":[83],"group-level":[85],"patterns,":[86],"as":[87,89],"well":[88],"meaningful":[91],"layer":[92],"ambiguous":[94],"cases":[95],"reflect":[97],"the":[98],"complexity":[99],"health":[101],"discourse":[102],"in":[103,158],"private":[104],"messaging.":[105],"also":[107],"benchmark":[108],"classical":[109],"models,":[110],"fine-tuned":[111],"Small":[112],"Language":[113,120],"Models,":[114],"zero-":[116],"or":[117],"few-shot":[118],"Large":[119],"Models":[121],"under":[122],"realistic":[123],"data-scarcity":[124],"constraints,":[125],"demonstrating":[126],"embeddings":[129],"LLM":[131],"approaches":[132],"perform":[133],"competitively,":[134],"while":[135],"domain":[136],"alignment":[137],"availability":[140],"remain":[141],"critical":[142],"factors.":[143],"study":[145],"provides":[146],"rare,":[148],"resource":[150],"support":[152],"misinformation":[153],"research":[154],"computational":[156],"modeling":[157],"encrypted":[159],"communication":[160],"environments.":[161]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-15T00:00:00"}
