{"id":"https://openalex.org/W4393737873","doi":"https://doi.org/10.5281/zenodo.8174336","title":"Wikipedia Multilingual Vandalism Detection Dataset","display_name":"Wikipedia Multilingual Vandalism Detection Dataset","publication_year":2023,"publication_date":"2023-07-22","ids":{"openalex":"https://openalex.org/W4393737873","doi":"https://doi.org/10.5281/zenodo.8174336"},"language":"en","primary_location":{"id":"pmh:oai:zenodo.org:8174336","is_oa":true,"landing_page_url":"https://zenodo.org/record/8174336","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://zenodo.org/record/8174336","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005178641","display_name":"Mykola Trokhymovych","orcid":"https://orcid.org/0000-0001-9932-7094"},"institutions":[{"id":"https://openalex.org/I170486558","display_name":"Universitat Pompeu Fabra","ror":"https://ror.org/04n0g0b29","country_code":"ES","type":"education","lineage":["https://openalex.org/I170486558"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Mykola Trokhymovych","raw_affiliation_strings":["Pompeu Fabra University"],"raw_orcid":"https://orcid.org/0000-0001-9932-7094","affiliations":[{"raw_affiliation_string":"Pompeu Fabra University","institution_ids":["https://openalex.org/I170486558"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101496219","display_name":"Muniza Aslam","orcid":null},"institutions":[{"id":"https://openalex.org/I4210126245","display_name":"Wikimedia Foundation","ror":"https://ror.org/032q98j12","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210126245"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Muniza Aslam","raw_affiliation_strings":["Wikimedia Foundation"],"raw_orcid":"https://orcid.org/0009-0005-9171-060X","affiliations":[{"raw_affiliation_string":"Wikimedia Foundation","institution_ids":["https://openalex.org/I4210126245"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018298207","display_name":"Ai-Jou Chou","orcid":"https://orcid.org/0000-0002-5269-7167"},"institutions":[{"id":"https://openalex.org/I4210126245","display_name":"Wikimedia Foundation","ror":"https://ror.org/032q98j12","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210126245"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ai-Jou Chou","raw_affiliation_strings":["Wikimedia Foundation"],"raw_orcid":"https://orcid.org/0000-0002-5269-7167","affiliations":[{"raw_affiliation_string":"Wikimedia Foundation","institution_ids":["https://openalex.org/I4210126245"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076204770","display_name":"Ricardo Baeza\u2010Yates","orcid":"https://orcid.org/0000-0003-3208-9778"},"institutions":[{"id":"https://openalex.org/I87182695","display_name":"Universidad del Noreste","ror":"https://ror.org/02ahky613","country_code":"MX","type":"education","lineage":["https://openalex.org/I87182695"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Ricardo Baeza-Yates","raw_affiliation_strings":["EAI, Northeastern University"],"raw_orcid":"https://orcid.org/0000-0003-3208-9778","affiliations":[{"raw_affiliation_string":"EAI, Northeastern University","institution_ids":["https://openalex.org/I87182695"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042104049","display_name":"Diego S\u00e1ez-Trumper","orcid":"https://orcid.org/0000-0002-7679-5423"},"institutions":[{"id":"https://openalex.org/I4210126245","display_name":"Wikimedia Foundation","ror":"https://ror.org/032q98j12","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210126245"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Diego Saez-Trumper","raw_affiliation_strings":["Wikimedia Foundation"],"raw_orcid":"https://orcid.org/0000-0002-7679-5423","affiliations":[{"raw_affiliation_string":"Wikimedia Foundation","institution_ids":["https://openalex.org/I4210126245"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5005178641"],"corresponding_institution_ids":["https://openalex.org/I170486558"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12478","display_name":"Wikis in Education and Collaboration","score":0.9684000015258789,"subfield":{"id":"https://openalex.org/subfields/3315","display_name":"Communication"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12478","display_name":"Wikis in Education and Collaboration","score":0.9684000015258789,"subfield":{"id":"https://openalex.org/subfields/3315","display_name":"Communication"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9377999901771545,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5775735378265381},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.38407641649246216},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.36827725172042847},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3441659212112427}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5775735378265381},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.38407641649246216},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.36827725172042847},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3441659212112427}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:zenodo.org:8174336","is_oa":true,"landing_page_url":"https://zenodo.org/record/8174336","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},{"id":"doi:10.5281/zenodo.8174336","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.8174336","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"pmh:oai:zenodo.org:8174336","is_oa":true,"landing_page_url":"https://zenodo.org/record/8174336","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W2530322880"],"abstract_inverted_index":{"This":[0],"dataset":[1,24,45,74,111,268,289],"accompanies":[2],"a":[3,8,58,243,266,317,332],"research":[4,50,314],"paper":[5,186,241],"that":[6,343],"introduces":[7],"novel":[9,244],"system":[10,248,297],"designed":[11],"to":[12,28,47,75,86,160,285,331],"support":[13],"the":[14,21,30,65,126,130,162,170,247,252,259,287,296,303,321,344,365],"Wikipedia":[15,35,66,131,253,327],"community":[16,254],"in":[17,37,53,255,306,308,316,320],"combating":[18],"vandalism":[19,54,82,119,171,257],"on":[20,258,370],"platform.":[22,260],"The":[23,40,110,293],"has":[25],"been":[26],"prepared":[27],"enhance":[29],"accuracy":[31],"and":[32,51,60,69,77,84,117,143,149,154,165,176,182,193,196,199,202,272,276,373],"efficiency":[33],"of":[34,42,96,129,180,246,269,295,323,335,359,364],"patrolling":[36,328],"multiple":[38],"languages.":[39],"release":[41],"this":[43,73],"comprehensive":[44],"aims":[46],"encourage":[48],"further":[49],"development":[52],"detection":[55,83,120,172],"techniques,":[56,279],"fostering":[57],"safer":[59],"more":[61,329,350],"inclusive":[62],"environment":[63],"for":[64,81,115,167,212,223],"community.":[67],"Researchers":[68],"practitioners":[70],"can":[71],"utilize":[72],"train":[76],"validate":[78],"their":[79],"models":[80],"contribute":[85],"improving":[87],"online":[88],"platforms'":[89],"content":[90],"moderation":[91],"strategies.":[92],"Dataset":[93],"Details:":[94],"Number":[95],"Languages:":[97],"47":[98,270],"Observation":[99],"period:":[100],"6":[101],"months":[102],"training,":[103],"one":[104,304],"week":[105],"hold-out":[106,177],"testing":[107,178],"Use":[108],"Case:":[109],"is":[112],"primarily":[113],"intended":[114],"training":[116,169,288],"evaluating":[118],"systems.":[121],"Features:":[122],"Each":[123],"record":[124],"characterizes":[125],"corresponding":[127,144],"revision":[128,134],"page,":[132],"including":[133,280],"metadata,":[135],"user":[136],"details,":[137],"text":[138],"inserted,":[139],"removed,":[140],"or":[141],"changed,":[142],"MLMs-based":[145],"features.":[146],"Data":[147,374],"Filtering":[148],"Feature":[150],"Engineering:":[151],"Advanced":[152],"filtering":[153,275],"feature":[155,277],"engineering":[156,278],"techniques":[157],"were":[158],"applied":[159,273],"ensure":[161],"dataset's":[163],"quality":[164],"relevance":[166],"effectively":[168],"system.":[173],"Files:":[174],"Training":[175],"datasets":[179],"anonymous":[181],"all":[183],"users.":[184],"Related":[185],"citation:":[187],"@inproceedings{10.1145/3580305.3599823,":[188],"author":[189],"=":[190,206,215,218,221,227,233,236,239,362,377,380,383,389],"{Trokhymovych,":[191],"Mykola":[192],"Aslam,":[194],"Muniza":[195],"Chou,":[197],"Ai-Jou":[198],"Baeza-Yates,":[200],"Ricardo":[201],"Saez-Trumper,":[203],"Diego},":[204],"title":[205],"{Fair":[207],"Multilingual":[208],"Vandalism":[209],"Detection":[210],"System":[211],"Wikipedia},":[213],"year":[214],"{2023},":[216],"isbn":[217],"{9798400701030},":[219],"publisher":[220],"{Association":[222],"Computing":[224],"Machinery},":[225],"address":[226],"{New":[228],"York,":[229],"NY,":[230],"USA},":[231,387],"url":[232],"{https://doi.org/10.1145/3580305.3599823},":[234],"doi":[235],"{10.1145/3580305.3599823},":[237],"abstract":[238],"{This":[240],"presents":[242],"design":[245],"aimed":[249],"at":[250],"supporting":[251],"addressing":[256],"To":[261],"achieve":[262],"this,":[263],"we":[264],"collected":[265],"massive":[267],"languages,":[271],"advanced":[274],"multilingual":[281],"masked":[282],"language":[283],"modeling":[284],"build":[286],"from":[290],"human-generated":[291],"data.":[292],"performance":[294],"was":[298],"evaluated":[299],"through":[300],"comparison":[301],"with":[302],"used":[305],"production":[307],"Wikipedia,":[309],"known":[310],"as":[311],"ORES.":[312],"Our":[313],"results":[315,345],"significant":[318],"increase":[319],"number":[322],"languages":[324],"covered,":[325],"making":[326],"efficient":[330],"wider":[333],"range":[334],"communities.":[336],"Furthermore,":[337],"our":[338],"model":[339],"outperforms":[340],"ORES,":[341],"ensuring":[342],"provided":[346],"are":[347],"not":[348],"only":[349],"accurate":[351],"but":[352],"also":[353],"less":[354],"biased":[355],"against":[356],"certain":[357],"groups":[358],"contributors.},":[360],"booktitle":[361],"{Proceedings":[363],"29th":[366],"ACM":[367],"SIGKDD":[368],"Conference":[369],"Knowledge":[371],"Discovery":[372],"Mining},":[375],"pages":[376],"{4981\u20134990},":[378],"numpages":[379],"{10},":[381],"location":[382],"{Long":[384],"Beach,":[385],"CA,":[386],"series":[388],"{KDD":[390],"'23}":[391],"}":[392]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
