{"id":"https://openalex.org/W2053617566","doi":"https://doi.org/10.1145/2348283.2348413","title":"Predicting quality flaws in user-generated content","display_name":"Predicting quality flaws in user-generated content","publication_year":2012,"publication_date":"2012-08-12","ids":{"openalex":"https://openalex.org/W2053617566","doi":"https://doi.org/10.1145/2348283.2348413","mag":"2053617566"},"language":"en","primary_location":{"id":"doi:10.1145/2348283.2348413","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2348283.2348413","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 35th international ACM SIGIR conference on Research and development in information retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057525856","display_name":"Maik Anderka","orcid":null},"institutions":[{"id":"https://openalex.org/I51441396","display_name":"Bauhaus-Universit\u00e4t Weimar","ror":"https://ror.org/033bb5z47","country_code":"DE","type":"education","lineage":["https://openalex.org/I51441396"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Maik Anderka","raw_affiliation_strings":["Bauhaus-Universit\u00e4t Weimar, Weimar, Germany"],"affiliations":[{"raw_affiliation_string":"Bauhaus-Universit\u00e4t Weimar, Weimar, Germany","institution_ids":["https://openalex.org/I51441396"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027915931","display_name":"Benno Stein","orcid":"https://orcid.org/0000-0001-9033-2217"},"institutions":[{"id":"https://openalex.org/I51441396","display_name":"Bauhaus-Universit\u00e4t Weimar","ror":"https://ror.org/033bb5z47","country_code":"DE","type":"education","lineage":["https://openalex.org/I51441396"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Benno Stein","raw_affiliation_strings":["Bauhaus-Universit\u00e4t Weimar, Weimar, Germany"],"affiliations":[{"raw_affiliation_string":"Bauhaus-Universit\u00e4t Weimar, Weimar, Germany","institution_ids":["https://openalex.org/I51441396"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016933602","display_name":"Nedim Lipka","orcid":"https://orcid.org/0000-0002-3779-7784"},"institutions":[{"id":"https://openalex.org/I51441396","display_name":"Bauhaus-Universit\u00e4t Weimar","ror":"https://ror.org/033bb5z47","country_code":"DE","type":"education","lineage":["https://openalex.org/I51441396"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Nedim Lipka","raw_affiliation_strings":["Bauhaus-Universit\u00e4t Weimar, Weimar, Germany"],"affiliations":[{"raw_affiliation_string":"Bauhaus-Universit\u00e4t Weimar, Weimar, Germany","institution_ids":["https://openalex.org/I51441396"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5057525856"],"corresponding_institution_ids":["https://openalex.org/I51441396"],"apc_list":null,"apc_paid":null,"fwci":11.1359,"has_fulltext":false,"cited_by_count":70,"citation_normalized_percentile":{"value":0.98143299,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"981","last_page":"990"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12478","display_name":"Wikis in Education and Collaboration","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/3315","display_name":"Communication"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12478","display_name":"Wikis in Education and Collaboration","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/3315","display_name":"Communication"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9689000248908997,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13157","display_name":"Cancer-related gene regulation","score":0.9114000201225281,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8386105895042419},{"id":"https://openalex.org/keywords/encyclopedia","display_name":"Encyclopedia","score":0.6738216280937195},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.637377142906189},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.554737389087677},{"id":"https://openalex.org/keywords/user-generated-content","display_name":"User-generated content","score":0.4825546145439148},{"id":"https://openalex.org/keywords/binary-classification","display_name":"Binary classification","score":0.4699561595916748},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.4441571831703186},{"id":"https://openalex.org/keywords/web-content","display_name":"Web content","score":0.4216320514678955},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.40747615694999695},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.39837467670440674},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39831364154815674},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3736443519592285},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.3191533088684082},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2431710660457611},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.19394493103027344}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8386105895042419},{"id":"https://openalex.org/C148863701","wikidata":"https://www.wikidata.org/wiki/Q5292","display_name":"Encyclopedia","level":2,"score":0.6738216280937195},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.637377142906189},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.554737389087677},{"id":"https://openalex.org/C101293273","wikidata":"https://www.wikidata.org/wiki/Q579716","display_name":"User-generated content","level":3,"score":0.4825546145439148},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.4699561595916748},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.4441571831703186},{"id":"https://openalex.org/C2776324614","wikidata":"https://www.wikidata.org/wiki/Q3948731","display_name":"Web content","level":3,"score":0.4216320514678955},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40747615694999695},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.39837467670440674},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39831364154815674},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3736443519592285},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.3191533088684082},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2431710660457611},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.19394493103027344},{"id":"https://openalex.org/C161191863","wikidata":"https://www.wikidata.org/wiki/Q199655","display_name":"Library science","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2348283.2348413","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2348283.2348413","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 35th international ACM SIGIR conference on Research and development in information retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4300000071525574}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W9825390","https://openalex.org/W169460412","https://openalex.org/W1488833649","https://openalex.org/W1500690868","https://openalex.org/W1521843029","https://openalex.org/W1539380046","https://openalex.org/W1547720490","https://openalex.org/W1567491469","https://openalex.org/W1602942503","https://openalex.org/W1930624869","https://openalex.org/W1972644898","https://openalex.org/W1986571907","https://openalex.org/W1990220548","https://openalex.org/W1993924397","https://openalex.org/W2011920246","https://openalex.org/W2020867023","https://openalex.org/W2037858832","https://openalex.org/W2038691557","https://openalex.org/W2043224316","https://openalex.org/W2045778260","https://openalex.org/W2048373593","https://openalex.org/W2048395990","https://openalex.org/W2065753378","https://openalex.org/W2072008013","https://openalex.org/W2077513286","https://openalex.org/W2095030488","https://openalex.org/W2098410026","https://openalex.org/W2099580784","https://openalex.org/W2102271161","https://openalex.org/W2104290389","https://openalex.org/W2111122424","https://openalex.org/W2119787124","https://openalex.org/W2129514578","https://openalex.org/W2134510195","https://openalex.org/W2136219771","https://openalex.org/W2138752966","https://openalex.org/W2158342607","https://openalex.org/W2185377712","https://openalex.org/W2911964244","https://openalex.org/W2912934387","https://openalex.org/W4285719527","https://openalex.org/W6738852829"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W3200179079","https://openalex.org/W4249229055","https://openalex.org/W4205288553","https://openalex.org/W3215867059","https://openalex.org/W2556319748","https://openalex.org/W4328092580","https://openalex.org/W2891961174","https://openalex.org/W4322008322","https://openalex.org/W2171095014"],"abstract_inverted_index":{"The":[0,74,237],"detection":[1],"and":[2,109,158,175],"improvement":[3],"of":[4,33,59,137,155,196,206,222],"low-quality":[5,70],"information":[6],"is":[7,23,45,76,106,200,241],"a":[8,20,134,165,171,177,207,220,270],"key":[9],"concern":[10],"in":[11,67,86,190,226],"Web":[12,88],"applications":[13,89],"that":[14,93,143,247],"are":[15,83,150],"based":[16,77],"on":[17,30,78,115],"user-generated":[18,34,112],"content;":[19],"popular":[21,111],"example":[22],"the":[24,38,43,57,102,107,116,126,153,191,194,204,216,223,231,252],"online":[25],"encyclopedia":[26],"Wikipedia.":[27],"Existing":[28],"research":[29],"quality":[31,60,156,161,172,187,256],"assessment":[32],"content":[35,44,71,92],"deals":[36],"with":[37,133,230,243,251,261,269],"classification":[39,148,167],"as":[40,164,219],"to":[41,90,101,124,182,228,273],"whether":[42],"high-quality":[46],"or":[47,146],"low-quality.":[48],"This":[49],"paper":[50],"goes":[51],"one":[52],"step":[53],"further:":[54],"it":[55],"targets":[56],"prediction":[58,75,154,163,239],"flaws,":[61],"this":[62,99,212],"way":[63],"providing":[64],"specific":[65],"indications":[66],"which":[68,82,105,130],"respects":[69],"needs":[72],"improvement.":[73],"user-defined":[79],"cleanup":[80,128],"tags,":[81,129],"commonly":[84],"used":[85],"many":[87],"tag":[91],"has":[94],"some":[95],"shortcomings.":[96],"We":[97,118,141,169],"apply":[98],"approach":[100,123,181],"English":[103],"Wikipedia,":[104],"largest":[108],"most":[110,185,254],"knowledge":[113],"source":[114],"Web.":[117],"present":[119],"an":[120],"automatic":[121],"mining":[122],"identify":[125],"existing":[127],"provides":[131],"us":[132],"training":[135],"corpus":[136],"labeled":[138],"Wikipedia":[139,192,245],"articles.":[140],"argue":[142],"common":[144],"binary":[145],"multiclass":[147],"approaches":[149],"ineffective":[151],"for":[152],"flaws":[157,265],"hence":[159],"cast":[160],"flaw":[162,173,224,238],"one-class":[166],"problem.":[168],"develop":[170],"model":[174],"employ":[176],"dedicated":[178],"machine":[179],"learning":[180],"predict":[183],"Wikipedia's":[184],"important":[186],"flaws.":[188],"Since":[189],"setting":[193],"acquisition":[195],"significant":[197],"test":[198,259],"data":[199,260],"intricate,":[201],"we":[202,214],"analyze":[203],"effects":[205],"biased":[208],"sample":[209],"selection.":[210],"In":[211],"regard":[213],"illustrate":[215],"classifier":[217],"effectiveness":[218],"function":[221],"distribution":[225],"order":[227],"cope":[229],"unknown":[232],"(real-world)":[233],"flaw-specific":[234],"class":[235],"imbalances.":[236],"performance":[240],"evaluated":[242],"10,000":[244],"articles":[246],"have":[248],"been":[249],"tagged":[250],"ten":[253],"frequent":[255],"flaws:":[257],"provided":[258],"little":[262],"noise,":[263],"four":[264],"can":[266],"be":[267],"detected":[268],"precision":[271],"close":[272],"1.":[274]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":9},{"year":2018,"cited_by_count":8},{"year":2017,"cited_by_count":10},{"year":2016,"cited_by_count":7},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":8},{"year":2013,"cited_by_count":5},{"year":2012,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
