{"id":"https://openalex.org/W1981739849","doi":"https://doi.org/10.1145/2766462.2767834","title":"Modeling Website Topic Cohesion at Scale to Improve Webpage Classification","display_name":"Modeling Website Topic Cohesion at Scale to Improve Webpage Classification","publication_year":2015,"publication_date":"2015-08-04","ids":{"openalex":"https://openalex.org/W1981739849","doi":"https://doi.org/10.1145/2766462.2767834","mag":"1981739849"},"language":"en","primary_location":{"id":"doi:10.1145/2766462.2767834","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2766462.2767834","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 38th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006970235","display_name":"Dhivya Eswaran","orcid":null},"institutions":[{"id":"https://openalex.org/I24676775","display_name":"Indian Institute of Technology Madras","ror":"https://ror.org/03v0r5n49","country_code":"IN","type":"facility","lineage":["https://openalex.org/I24676775"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Dhivya Eswaran","raw_affiliation_strings":["Indian Institute of Technology, Madras, Chennai, India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology, Madras, Chennai, India","institution_ids":["https://openalex.org/I24676775"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102869952","display_name":"Paul N. Bennett","orcid":"https://orcid.org/0000-0002-8846-5480"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Paul N. Bennett","raw_affiliation_strings":["Microsoft Research, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Redmond, WA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5110629518","display_name":"Joseph J. Pfeiffer","orcid":null},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Joseph J. Pfeiffer","raw_affiliation_strings":["Purdue University, West Lafayette, IN, USA"],"affiliations":[{"raw_affiliation_string":"Purdue University, West Lafayette, IN, USA","institution_ids":["https://openalex.org/I219193219"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5006970235"],"corresponding_institution_ids":["https://openalex.org/I24676775"],"apc_list":null,"apc_paid":null,"fwci":0.4314,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.75742434,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"787","last_page":"790"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9848999977111816,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7849586606025696},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7334238886833191},{"id":"https://openalex.org/keywords/hyperlink","display_name":"Hyperlink","score":0.7185136079788208},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.6531733274459839},{"id":"https://openalex.org/keywords/cohesion","display_name":"Cohesion (chemistry)","score":0.6201560497283936},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5088574886322021},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.45814794301986694},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.40655913949012756},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3848521113395691},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3293913006782532},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.32932478189468384},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.1560148000717163}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7849586606025696},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7334238886833191},{"id":"https://openalex.org/C30088001","wikidata":"https://www.wikidata.org/wiki/Q102014","display_name":"Hyperlink","level":3,"score":0.7185136079788208},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.6531733274459839},{"id":"https://openalex.org/C104054115","wikidata":"https://www.wikidata.org/wiki/Q216828","display_name":"Cohesion (chemistry)","level":2,"score":0.6201560497283936},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5088574886322021},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.45814794301986694},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.40655913949012756},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3848521113395691},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3293913006782532},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.32932478189468384},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.1560148000717163},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2766462.2767834","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2766462.2767834","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 38th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W69389495","https://openalex.org/W1502275721","https://openalex.org/W1860880244","https://openalex.org/W1987233790","https://openalex.org/W2005422315","https://openalex.org/W2012354735","https://openalex.org/W2032536435","https://openalex.org/W2090561416","https://openalex.org/W2095627566","https://openalex.org/W2105879985","https://openalex.org/W2114535528","https://openalex.org/W2126185296","https://openalex.org/W2135500808","https://openalex.org/W2141253686","https://openalex.org/W2171806843","https://openalex.org/W4285719527","https://openalex.org/W6602833465"],"related_works":["https://openalex.org/W2384033143","https://openalex.org/W2245616560","https://openalex.org/W2351804282","https://openalex.org/W2048998278","https://openalex.org/W1556894713","https://openalex.org/W1490416172","https://openalex.org/W158130761","https://openalex.org/W2588706232","https://openalex.org/W2096877906","https://openalex.org/W2084041205"],"abstract_inverted_index":{"Considerable":[0],"work":[1,28],"in":[2,45],"web":[3,15,65],"page":[4,83],"classification":[5,85],"has":[6,29],"focused":[7,31],"on":[8,32],"incorporating":[9,98],"the":[10,14,17,25,51,60,64,94,103,107],"topical":[11,61],"structure":[12,62],"of":[13,27,53,63,106,125],"(e.g.,":[16],"hyperlink":[18],"graph)":[19],"to":[20,40,58,93],"improve":[21],"prediction":[22],"accuracy.":[23],"However,":[24],"majority":[26],"primarily":[30],"relational":[33],"or":[34,44],"graph-based":[35],"methods":[36],"that":[37,87,115],"are":[38],"impractical":[39],"run":[41],"at":[42],"scale":[43],"an":[46,78],"online":[47],"environment.":[48],"This":[49],"raises":[50],"question":[52],"whether":[54],"it":[55],"is":[56],"possible":[57],"leverage":[59],"while":[66],"incurring":[67],"nearly":[68],"no":[69],"additional":[70],"prediction-time":[71],"cost.":[72],"To":[73],"this":[74],"end,":[75],"we":[76,112],"introduce":[77],"approach":[79,117],"which":[80,101],"adjusts":[81],"a":[82,90,99,123],"content-only":[84],"from":[86],"obtained":[88,96],"with":[89],"global":[91],"prior":[92,100],"posterior":[95],"by":[97],"reflects":[102],"topic":[104],"cohesion":[105],"site.":[108],"Using":[109],"ODP":[110],"data,":[111],"empirically":[113],"demonstrate":[114],"our":[116],"yields":[118],"significant":[119],"performance":[120],"increases":[121],"over":[122],"range":[124],"topics.":[126]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
