{"id":"https://openalex.org/W2038917359","doi":"https://doi.org/10.1145/2499178.2499196","title":"Exploiting Forum Thread Structures to Improve Thread Clustering","display_name":"Exploiting Forum Thread Structures to Improve Thread Clustering","publication_year":2013,"publication_date":"2013-09-29","ids":{"openalex":"https://openalex.org/W2038917359","doi":"https://doi.org/10.1145/2499178.2499196","mag":"2038917359"},"language":"en","primary_location":{"id":"doi:10.1145/2499178.2499196","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2499178.2499196","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2013 Conference on the Theory of Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002612793","display_name":"Kumaresh Pattabiraman","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kumaresh Pattabiraman","raw_affiliation_strings":["University of Illinois, Urbana-Champaign, Dept. of Computer Science"],"affiliations":[{"raw_affiliation_string":"University of Illinois, Urbana-Champaign, Dept. of Computer Science","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001811638","display_name":"Parikshit Sondhi","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Parikshit Sondhi","raw_affiliation_strings":["University of Illinois, Urbana-Champaign, Dept. of Computer Science"],"affiliations":[{"raw_affiliation_string":"University of Illinois, Urbana-Champaign, Dept. of Computer Science","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028518494","display_name":"ChengXiang Zhai","orcid":"https://orcid.org/0000-0002-6434-3702"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"ChengXiang Zhai","raw_affiliation_strings":["University of Illinois, Urbana-Champaign, Dept. of Computer Science"],"affiliations":[{"raw_affiliation_string":"University of Illinois, Urbana-Champaign, Dept. of Computer Science","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5002612793"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":3.2358,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.92824775,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"64","last_page":"71"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8081760406494141},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.8013445734977722},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.788189709186554},{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.45576462149620056},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.44198957085609436},{"id":"https://openalex.org/keywords/hierarchical-clustering","display_name":"Hierarchical clustering","score":0.4256497621536255},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.363663911819458},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.24336892366409302},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.09815052151679993}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8081760406494141},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.8013445734977722},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.788189709186554},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.45576462149620056},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.44198957085609436},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.4256497621536255},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.363663911819458},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.24336892366409302},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.09815052151679993},{"id":"https://openalex.org/C126838900","wikidata":"https://www.wikidata.org/wiki/Q77604","display_name":"Radiology","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2499178.2499196","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2499178.2499196","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2013 Conference on the Theory of Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332467","display_name":"U.S. Air Force","ror":"https://ror.org/006gmme17"},{"id":"https://openalex.org/F4320333591","display_name":"Multidisciplinary University Research Initiative","ror":null},{"id":"https://openalex.org/F4320338279","display_name":"Air Force Office of Scientific Research","ror":"https://ror.org/011e9bt93"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W8870360","https://openalex.org/W177764760","https://openalex.org/W1576083876","https://openalex.org/W1620204465","https://openalex.org/W1651093245","https://openalex.org/W1938740620","https://openalex.org/W1998312906","https://openalex.org/W2019071678","https://openalex.org/W2019973434","https://openalex.org/W2049633694","https://openalex.org/W2056756258","https://openalex.org/W2085030399","https://openalex.org/W2086662461","https://openalex.org/W2107743791","https://openalex.org/W2112247328","https://openalex.org/W2126226055","https://openalex.org/W2127218421","https://openalex.org/W2134557008","https://openalex.org/W2161744072","https://openalex.org/W2299467264","https://openalex.org/W2340309946","https://openalex.org/W2482589566","https://openalex.org/W3149154678","https://openalex.org/W4233135949","https://openalex.org/W4285719527","https://openalex.org/W6600367688","https://openalex.org/W6633894697","https://openalex.org/W6637231022","https://openalex.org/W7048738093"],"related_works":["https://openalex.org/W2180954594","https://openalex.org/W2052835778","https://openalex.org/W2049003611","https://openalex.org/W2127804977","https://openalex.org/W2108418243","https://openalex.org/W164103134","https://openalex.org/W2787352659","https://openalex.org/W4206560911","https://openalex.org/W1970611213","https://openalex.org/W4372260270"],"abstract_inverted_index":{"Automated":[0],"clustering":[1,31,46,79,106,204],"of":[2,26,38,55,76,102,128,156,194],"threads":[3,94,103],"within":[4],"and":[5,14,21,52,86,95,147,191],"across":[6],"web":[7],"forums":[8,143],"will":[9],"greatly":[10],"benefit":[11],"both":[12,145,188],"users":[13],"forum":[15,47,56,67,93,121],"administrators":[16],"in":[17,119],"efficiently":[18],"seeking,":[19],"managing,":[20],"integrating":[22],"the":[23,49,77,100,117,134,154,157,167],"huge":[24],"volume":[25],"content":[27],"being":[28],"generated.":[29],"While":[30],"has":[32,42],"been":[33,43],"studied":[34],"for":[35,113,144,163,187],"other":[36],"types":[37],"data,":[39],"little":[40],"work":[41],"done":[44],"on":[45,136],"threads;":[48],"informal":[50],"nature":[51],"special":[53],"structure":[54,101],"data":[57,137],"make":[58],"it":[59],"interesting":[60],"to":[61,64,91,98,104,116,123,199],"study":[62,96],"how":[63,97],"effectively":[65],"cluster":[66,92],"threads.":[68],"In":[69,177],"this":[70,164],"paper,":[71],"we":[72],"apply":[73],"three":[74,110,140],"state":[75,155],"art":[78,158],"methods":[80,112,135,159],"(i.e.,":[81],"hierarchical":[82],"agglomerative":[83],"clustering,":[84],"k-Means,":[85],"probabilistic":[87],"latent":[88],"semantic":[89],"analysis)":[90],"leverage":[99],"improve":[105],"accuracy.":[107],"We":[108,131],"propose":[109],"different":[111,141],"assigning":[114],"weights":[115,186],"posts":[118,190,193],"a":[120,129,179,195,202],"thread":[122,175,196],"achieve":[124],"more":[125],"accurate":[126],"representation":[127],"thread.":[130],"evaluate":[132],"all":[133],"collected":[138],"from":[139],"Linux":[142],"within-forum":[146],"across-forum":[148],"clustering.":[149],"Our":[150],"results":[151],"show":[152],"that":[153,183],"perform":[160],"reasonably":[161],"well":[162],"task,":[165],"but":[166],"performance":[168],"can":[169],"be":[170],"further":[171],"improved":[172],"by":[173],"exploiting":[174],"structures.":[176],"particular,":[178],"parabolic":[180],"weighting":[181],"method":[182],"assigns":[184],"higher":[185],"beginning":[189],"end":[192],"is":[197],"shown":[198],"consistently":[200],"outperform":[201],"standard":[203],"method.":[205]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2014,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
