{"id":"https://openalex.org/W2063070994","doi":"https://doi.org/10.1145/1242572.1242736","title":"Classifying web sites","display_name":"Classifying web sites","publication_year":2007,"publication_date":"2007-05-08","ids":{"openalex":"https://openalex.org/W2063070994","doi":"https://doi.org/10.1145/1242572.1242736","mag":"2063070994"},"language":"en","primary_location":{"id":"doi:10.1145/1242572.1242736","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1242572.1242736","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th international conference on World Wide Web","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068628125","display_name":"Christoph Lindemann","orcid":null},"institutions":[{"id":"https://openalex.org/I926574661","display_name":"Leipzig University","ror":"https://ror.org/03s7gtk40","country_code":"DE","type":"education","lineage":["https://openalex.org/I926574661"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Christoph Lindemann","raw_affiliation_strings":["University of Leipzig"],"affiliations":[{"raw_affiliation_string":"University of Leipzig","institution_ids":["https://openalex.org/I926574661"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085154909","display_name":"Lars Littig","orcid":null},"institutions":[{"id":"https://openalex.org/I926574661","display_name":"Leipzig University","ror":"https://ror.org/03s7gtk40","country_code":"DE","type":"education","lineage":["https://openalex.org/I926574661"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Lars Littig","raw_affiliation_strings":["University of Leipzig"],"affiliations":[{"raw_affiliation_string":"University of Leipzig","institution_ids":["https://openalex.org/I926574661"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5068628125"],"corresponding_institution_ids":["https://openalex.org/I926574661"],"apc_list":null,"apc_paid":null,"fwci":3.165,"has_fulltext":false,"cited_by_count":17,"citation_normalized_percentile":{"value":0.92792159,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1143","last_page":"1144"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13976","display_name":"Web visibility and informetrics","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8092864155769348},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6089983582496643},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.5997987985610962},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5505167245864868},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5048320889472961},{"id":"https://openalex.org/keywords/web-mining","display_name":"Web mining","score":0.4867265522480011},{"id":"https://openalex.org/keywords/web-site","display_name":"Web site","score":0.47773975133895874},{"id":"https://openalex.org/keywords/data-web","display_name":"Data Web","score":0.46468591690063477},{"id":"https://openalex.org/keywords/web-modeling","display_name":"Web modeling","score":0.44193029403686523},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.35108551383018494},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3212256133556366}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8092864155769348},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6089983582496643},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.5997987985610962},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5505167245864868},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5048320889472961},{"id":"https://openalex.org/C197046077","wikidata":"https://www.wikidata.org/wiki/Q785337","display_name":"Web mining","level":3,"score":0.4867265522480011},{"id":"https://openalex.org/C2984519610","wikidata":"https://www.wikidata.org/wiki/Q35127","display_name":"Web site","level":3,"score":0.47773975133895874},{"id":"https://openalex.org/C162005631","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Data Web","level":3,"score":0.46468591690063477},{"id":"https://openalex.org/C130436687","wikidata":"https://www.wikidata.org/wiki/Q7978591","display_name":"Web modeling","level":3,"score":0.44193029403686523},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.35108551383018494},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3212256133556366},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1242572.1242736","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1242572.1242736","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th international conference on World Wide Web","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W2050028965","https://openalex.org/W2065168033","https://openalex.org/W2092906664"],"related_works":["https://openalex.org/W132269582","https://openalex.org/W2945931694","https://openalex.org/W2622968908","https://openalex.org/W3174726539","https://openalex.org/W2187012497","https://openalex.org/W2182629206","https://openalex.org/W2134078223","https://openalex.org/W2384444869","https://openalex.org/W2744329849","https://openalex.org/W144381034"],"abstract_inverted_index":{"In":[0],"this":[1,67],"paper,":[2],"we":[3],"present":[4],"a":[5,48,58,70],"novel":[6],"method":[7,15],"for":[8,32,96],"the":[9,37,97],"classification":[10,61,99],"of":[11,21,36,42,50,94,100],"Web":[12,22,43,51,76,87,102],"sites.":[13,44,103],"This":[14],"exploits":[16],"both":[17],"structure":[18],"and":[19,83],"content":[20],"sites":[23,52,77],"in":[24],"order":[25],"to":[26],"discern":[27],"their":[28],"functionality.":[29],"It":[30],"allows":[31],"distinguishing":[33],"between":[34],"eight":[35],"most":[38],"relevant":[39],"functional":[40],"classes":[41],"We":[45,65],"show":[46],"that":[47],"pre-classification":[49],"utilizing":[53],"structural":[54],"properties":[55],"considerably":[56],"improves":[57],"subsequent":[59],"textual":[60],"with":[62,78],"standard":[63],"techniques.":[64],"evaluate":[66],"approach":[68,90],"on":[69],"dataset":[71],"comprising":[72],"more":[73],"than":[74],"16,000":[75],"about":[79],"20":[80],"million":[81,85],"crawled":[82],"100":[84],"known":[86],"pages.":[88],"Our":[89],"achieves":[91],"an":[92],"accuracy":[93],"92%":[95],"coarse-grained":[98],"these":[101]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
