{"id":"https://openalex.org/W2079580248","doi":"https://doi.org/10.1145/1620432.1620447","title":"Near-duplicate detection for web-forums","display_name":"Near-duplicate detection for web-forums","publication_year":2009,"publication_date":"2009-01-01","ids":{"openalex":"https://openalex.org/W2079580248","doi":"https://doi.org/10.1145/1620432.1620447","mag":"2079580248"},"language":"en","primary_location":{"id":"doi:10.1145/1620432.1620447","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1620432.1620447","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2009 International Database Engineering &amp; Applications Symposium on - IDEAS '09","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031734305","display_name":"Klemens Muthmann","orcid":null},"institutions":[{"id":"https://openalex.org/I78650965","display_name":"Technische Universit\u00e4t Dresden","ror":"https://ror.org/042aqky30","country_code":"DE","type":"education","lineage":["https://openalex.org/I78650965"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Klemens Muthmann","raw_affiliation_strings":["Technische Universit\u00e4t Dresden, Dresden, Germany","Technische Universit\u00e1t Dresden, Dresden, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Dresden, Dresden, Germany","institution_ids":["https://openalex.org/I78650965"]},{"raw_affiliation_string":"Technische Universit\u00e1t Dresden, Dresden, Germany","institution_ids":["https://openalex.org/I78650965"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045754291","display_name":"Wojciech M. Barczy\u0144ski","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135152","display_name":"Roland Ernst Stiftung","ror":"https://ror.org/03m629g08","country_code":"DE","type":"nonprofit","lineage":["https://openalex.org/I4210135152"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Wojciech M. Barczy\u0144ski","raw_affiliation_strings":["SAP AG, SAP Research, Dresden, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SAP AG, SAP Research, Dresden, Germany","institution_ids":["https://openalex.org/I4210135152"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026608646","display_name":"Falk Brauer","orcid":null},"institutions":[{"id":"https://openalex.org/I4210135152","display_name":"Roland Ernst Stiftung","ror":"https://ror.org/03m629g08","country_code":"DE","type":"nonprofit","lineage":["https://openalex.org/I4210135152"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Falk Brauer","raw_affiliation_strings":["SAP AG, SAP Research, Dresden, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"SAP AG, SAP Research, Dresden, Germany","institution_ids":["https://openalex.org/I4210135152"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082325279","display_name":"Alexander L\u00f6ser","orcid":"https://orcid.org/0000-0002-4440-3261"},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Alexander L\u00f6ser","raw_affiliation_strings":["Technische Universit\u00e4t Berlin, Berlin, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Berlin, Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":8.5408,"has_fulltext":false,"cited_by_count":18,"citation_normalized_percentile":{"value":0.97480539,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"142","last_page":"142"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.809695839881897},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.570450484752655},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5517529845237732},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.4318464696407318},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.38143759965896606}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.809695839881897},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.570450484752655},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5517529845237732},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.4318464696407318},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.38143759965896606}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/1620432.1620447","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1620432.1620447","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2009 International Database Engineering &amp; Applications Symposium on - IDEAS '09","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.153.9485","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.153.9485","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://cis.cs.tu-berlin.de/~aloeser/publications/main_ideas_09.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.504.8285","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.504.8285","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.rn.inf.tu-dresden.de/uploads/publikationen/muthmann_ideas09.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1502916507","https://openalex.org/W1504211824","https://openalex.org/W1575805590","https://openalex.org/W1609518033","https://openalex.org/W1660390307","https://openalex.org/W2012833704","https://openalex.org/W2015419682","https://openalex.org/W2037858832","https://openalex.org/W2067432306","https://openalex.org/W2070333970","https://openalex.org/W2085922539","https://openalex.org/W2103243299","https://openalex.org/W2109803107","https://openalex.org/W2148885851"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W1488511360","https://openalex.org/W1781894645","https://openalex.org/W2894902932","https://openalex.org/W569715723","https://openalex.org/W188332989","https://openalex.org/W2982387199","https://openalex.org/W2000745862","https://openalex.org/W2120930843","https://openalex.org/W42013907"],"abstract_inverted_index":{"Current":[0],"forum":[1,25,64,78,121],"search":[2,20,31],"technologies":[3],"lack":[4],"the":[5,19],"ability":[6],"to":[7,14,35,41,54,93,119],"identify":[8,50],"threads":[9,17,38,122],"with":[10,29,89,123],"near-duplicate":[11,60],"content":[12],"and":[13,33,56,114],"group":[15,120],"these":[16,112],"in":[18],"results.":[21],"As":[22],"a":[23,58,71,76,124],"result,":[24],"users":[26],"are":[27,117],"overloaded":[28],"duplicated":[30],"results":[32,106],"prefer":[34],"create":[36],"new":[37,59],"without":[39],"trying":[40],"find":[42],"existing":[43],"ones.":[44],"In":[45],"this":[46,87],"paper":[47],"we":[48,109,116],"therefore":[49],"common":[51],"reasons":[52],"leading":[53],"near-duplicates":[55,98],"develop":[57],"detection":[61],"algorithm":[62,67],"for":[63,96],"threads.":[65],"The":[66],"is":[68],"implemented":[69],"using":[70],"large":[72],"case":[73],"study":[74],"of":[75,126],"real-world":[77],"serving":[79],"more":[80],"than":[81],"one":[82],"million":[83],"users.":[84],"We":[85],"compare":[86],"work":[88],"current":[90],"algorithms,":[91],"similar":[92],"[4,":[94],"5],":[95],"detecting":[97],"on":[99],"machine":[100],"generated":[101],"web":[102],"pages.":[103],"Our":[104],"preliminary":[105],"show,":[107],"that":[108,115],"significantly":[110],"outperform":[111],"algorithms":[113],"able":[118],"precision":[125],"74%.":[127]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":5}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
