{"id":"https://openalex.org/W3210907873","doi":"https://doi.org/10.1145/3459637.3482062","title":"Automatic Error Correction Using the Wikipedia Page Revision History","display_name":"Automatic Error Correction Using the Wikipedia Page Revision History","publication_year":2021,"publication_date":"2021-10-26","ids":{"openalex":"https://openalex.org/W3210907873","doi":"https://doi.org/10.1145/3459637.3482062","mag":"3210907873"},"language":"en","primary_location":{"id":"doi:10.1145/3459637.3482062","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3459637.3482062","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Information &amp; Knowledge Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100656463","display_name":"Md. Kamrul Hasan","orcid":"https://orcid.org/0000-0003-1292-4350"},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Md Kamrul Hasan","raw_affiliation_strings":["Technische Universit\u00e4t Berlin, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Berlin, Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100690239","display_name":"Mohammad Mahdavi","orcid":"https://orcid.org/0000-0002-4171-7310"},"institutions":[{"id":"https://openalex.org/I2800638415","display_name":"University of Applied Sciences Potsdam","ror":"https://ror.org/012m9bp23","country_code":"DE","type":"education","lineage":["https://openalex.org/I2800638415"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Mohammad Mahdavi","raw_affiliation_strings":["GISMA University of Applied Sciences, Potsdam, Germany"],"affiliations":[{"raw_affiliation_string":"GISMA University of Applied Sciences, Potsdam, Germany","institution_ids":["https://openalex.org/I2800638415"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100656463"],"corresponding_institution_ids":["https://openalex.org/I4577782"],"apc_list":null,"apc_paid":null,"fwci":0.1743,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.559126,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"3073","last_page":"3077"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9871000051498413,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9793000221252441,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8390460014343262},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7901769876480103},{"id":"https://openalex.org/keywords/error-detection-and-correction","display_name":"Error detection and correction","score":0.7267670035362244},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.7238666415214539},{"id":"https://openalex.org/keywords/tuple","display_name":"Tuple","score":0.5439962148666382},{"id":"https://openalex.org/keywords/data-pre-processing","display_name":"Data pre-processing","score":0.5385104417800903},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5023541450500488},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5019645690917969},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.46071112155914307},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44317489862442017},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35620343685150146},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3132997155189514},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.06716018915176392}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8390460014343262},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7901769876480103},{"id":"https://openalex.org/C103088060","wikidata":"https://www.wikidata.org/wiki/Q1062839","display_name":"Error detection and correction","level":2,"score":0.7267670035362244},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.7238666415214539},{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.5439962148666382},{"id":"https://openalex.org/C10551718","wikidata":"https://www.wikidata.org/wiki/Q5227332","display_name":"Data pre-processing","level":2,"score":0.5385104417800903},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5023541450500488},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5019645690917969},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.46071112155914307},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44317489862442017},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35620343685150146},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3132997155189514},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.06716018915176392},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3459637.3482062","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3459637.3482062","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Information &amp; Knowledge Management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W102708294","https://openalex.org/W1599547597","https://openalex.org/W1647671624","https://openalex.org/W1975184797","https://openalex.org/W1992479406","https://openalex.org/W2026140005","https://openalex.org/W2046298800","https://openalex.org/W2164187405","https://openalex.org/W2190899134","https://openalex.org/W2548122763","https://openalex.org/W2591700809","https://openalex.org/W2798323405","https://openalex.org/W2929941791","https://openalex.org/W2943955885","https://openalex.org/W2948145720","https://openalex.org/W2950577311","https://openalex.org/W2970651950","https://openalex.org/W2981435606","https://openalex.org/W3082197983","https://openalex.org/W3105977086"],"related_works":["https://openalex.org/W2989490741","https://openalex.org/W2367545121","https://openalex.org/W4248881655","https://openalex.org/W2482165163","https://openalex.org/W3092506759","https://openalex.org/W138569904","https://openalex.org/W3010890513","https://openalex.org/W2390914021","https://openalex.org/W2389417819","https://openalex.org/W2368524271"],"abstract_inverted_index":{"Error":[0],"correction":[1,16,28,65,85,123],"is":[2],"one":[3],"of":[4,11,72,80,124,139,142],"the":[5,89,100,114,121],"most":[6],"crucial":[7],"and":[8],"time-consuming":[9],"steps":[10],"data":[12,24,55,70,126,140],"preprocessing.":[13],"State-of-the-art":[14],"error":[15,64,81,116],"systems":[17],"leverage":[18],"various":[19,143],"signals,":[20],"such":[21],"as":[22],"predefined":[23],"constraints":[25],"or":[26],"user-provided":[27],"examples,":[29],"to":[30,44,53,67,119],"fix":[31,54,69],"erroneous":[32],"values":[33],"in":[34],"a":[35,45,62,78,136],"semi-supervised":[36],"manner.":[37],"While":[38],"these":[39,97],"approaches":[40],"reduce":[41],"human":[42],"involvement":[43],"few":[46],"labeled":[47],"tuples,":[48],"they":[49],"still":[50],"need":[51],"supervision":[52],"errors.":[56],"In":[57],"this":[58],"paper,":[59],"we":[60],"propose":[61],"novel":[63],"approach":[66,76,112,133],"automatically":[68,134],"errors":[71,141],"dirty":[73,101,144],"datasets.":[74],"Our":[75],"pretrains":[77],"set":[79],"corrector":[82,117],"models":[83,98,118],"on":[84,99],"examples":[86],"extracted":[87],"from":[88],"Wikipedia":[90],"page":[91],"revision":[92],"history.":[93],"It":[94],"then":[95],"fine-tunes":[96],"dataset":[102],"at":[103],"hand":[104],"without":[105],"any":[106],"required":[107],"user":[108],"labels.":[109],"Finally,":[110],"our":[111,129,132],"aggregates":[113],"fine-tuned":[115],"find":[120],"actual":[122],"each":[125],"error.":[127],"As":[128],"experiments":[130],"show,":[131],"fixes":[135],"large":[137],"portion":[138],"datasets":[145],"with":[146],"high":[147],"precision.":[148]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
