{"id":"https://openalex.org/W2145164528","doi":"https://doi.org/10.2498/cit.1001137","title":"Training a Genre Classifier for Automatic Classification of Web Pages","display_name":"Training a Genre Classifier for Automatic Classification of Web Pages","publication_year":2007,"publication_date":"2007-01-01","ids":{"openalex":"https://openalex.org/W2145164528","doi":"https://doi.org/10.2498/cit.1001137","mag":"2145164528"},"language":"en","primary_location":{"id":"doi:10.2498/cit.1001137","is_oa":true,"landing_page_url":"https://doi.org/10.2498/cit.1001137","pdf_url":"http://cit.fer.hr/index.php/CIT/article/download/1647/1351","source":{"id":"https://openalex.org/S98565333","display_name":"Journal of Computing and Information Technology","issn_l":"1330-1136","issn":["1330-1136","1846-3908"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310314807","host_organization_name":"Faculty of Electrical Engineering and Computing, University of Zagreb","host_organization_lineage":["https://openalex.org/P4310314807"],"host_organization_lineage_names":["Faculty of Electrical Engineering and Computing, University of Zagreb"],"type":"journal"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computing and Information Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"http://cit.fer.hr/index.php/CIT/article/download/1647/1351","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043406091","display_name":"Vedrana Vidulin","orcid":"https://orcid.org/0000-0002-8094-3281"},"institutions":[{"id":"https://openalex.org/I3006985408","display_name":"Jo\u017eef Stefan Institute","ror":"https://ror.org/05060sz93","country_code":"SI","type":"facility","lineage":["https://openalex.org/I3006985408"]}],"countries":["SI"],"is_corresponding":true,"raw_author_name":"Vedrana Vidulin","raw_affiliation_strings":["Jo\u017eef Stefan Institute, Ljubljana, Slovenia"],"affiliations":[{"raw_affiliation_string":"Jo\u017eef Stefan Institute, Ljubljana, Slovenia","institution_ids":["https://openalex.org/I3006985408"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056131302","display_name":"Mitja Lu\u0161trek","orcid":"https://orcid.org/0000-0003-3219-2935"},"institutions":[{"id":"https://openalex.org/I3006985408","display_name":"Jo\u017eef Stefan Institute","ror":"https://ror.org/05060sz93","country_code":"SI","type":"facility","lineage":["https://openalex.org/I3006985408"]}],"countries":["SI"],"is_corresponding":false,"raw_author_name":"Mitja Lustrek","raw_affiliation_strings":["Jo\u017eef Stefan Institute, Ljubljana, Slovenia"],"affiliations":[{"raw_affiliation_string":"Jo\u017eef Stefan Institute, Ljubljana, Slovenia","institution_ids":["https://openalex.org/I3006985408"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5043419128","display_name":"Matja\u017e Gams","orcid":"https://orcid.org/0000-0002-5747-0711"},"institutions":[{"id":"https://openalex.org/I3006985408","display_name":"Jo\u017eef Stefan Institute","ror":"https://ror.org/05060sz93","country_code":"SI","type":"facility","lineage":["https://openalex.org/I3006985408"]}],"countries":["SI"],"is_corresponding":false,"raw_author_name":"Matjaz Gams","raw_affiliation_strings":["Jo\u017eef Stefan Institute, Ljubljana, Slovenia"],"affiliations":[{"raw_affiliation_string":"Jo\u017eef Stefan Institute, Ljubljana, Slovenia","institution_ids":["https://openalex.org/I3006985408"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5043406091"],"corresponding_institution_ids":["https://openalex.org/I3006985408"],"apc_list":{"value":450,"currency":"EUR","value_usd":485},"apc_paid":{"value":450,"currency":"EUR","value_usd":485},"fwci":0.9387,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.82539752,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"15","issue":"4","first_page":"305","last_page":"305"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9757999777793884,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/c4.5-algorithm","display_name":"C4.5 algorithm","score":0.8646290302276611},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.839173436164856},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.6213854551315308},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.6117908954620361},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5465242862701416},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5148157477378845},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.511957049369812},{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.45972344279289246},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.42734023928642273},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.42515161633491516},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3740055561065674},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3482394218444824},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3247448205947876},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.18966203927993774},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.1601639986038208}],"concepts":[{"id":"https://openalex.org/C52003472","wikidata":"https://www.wikidata.org/wiki/Q1022655","display_name":"C4.5 algorithm","level":4,"score":0.8646290302276611},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.839173436164856},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.6213854551315308},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.6117908954620361},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5465242862701416},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5148157477378845},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.511957049369812},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.45972344279289246},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.42734023928642273},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42515161633491516},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3740055561065674},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3482394218444824},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3247448205947876},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.18966203927993774},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.1601639986038208},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.2498/cit.1001137","is_oa":true,"landing_page_url":"https://doi.org/10.2498/cit.1001137","pdf_url":"http://cit.fer.hr/index.php/CIT/article/download/1647/1351","source":{"id":"https://openalex.org/S98565333","display_name":"Journal of Computing and Information Technology","issn_l":"1330-1136","issn":["1330-1136","1846-3908"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310314807","host_organization_name":"Faculty of Electrical Engineering and Computing, University of Zagreb","host_organization_lineage":["https://openalex.org/P4310314807"],"host_organization_lineage_names":["Faculty of Electrical Engineering and Computing, University of Zagreb"],"type":"journal"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computing and Information Technology","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.2498/cit.1001137","is_oa":true,"landing_page_url":"https://doi.org/10.2498/cit.1001137","pdf_url":"http://cit.fer.hr/index.php/CIT/article/download/1647/1351","source":{"id":"https://openalex.org/S98565333","display_name":"Journal of Computing and Information Technology","issn_l":"1330-1136","issn":["1330-1136","1846-3908"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310314807","host_organization_name":"Faculty of Electrical Engineering and Computing, University of Zagreb","host_organization_lineage":["https://openalex.org/P4310314807"],"host_organization_lineage_names":["Faculty of Electrical Engineering and Computing, University of Zagreb"],"type":"journal"},"license":"cc-by-nd","license_id":"https://openalex.org/licenses/cc-by-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Computing and Information Technology","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.6600000262260437}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2145164528.pdf","grobid_xml":"https://content.openalex.org/works/W2145164528.grobid-xml"},"referenced_works_count":9,"referenced_works":["https://openalex.org/W645620088","https://openalex.org/W1559032787","https://openalex.org/W1570448133","https://openalex.org/W1571179743","https://openalex.org/W1680392829","https://openalex.org/W1995849575","https://openalex.org/W2051391088","https://openalex.org/W2146888100","https://openalex.org/W2165431734"],"related_works":["https://openalex.org/W3107474891","https://openalex.org/W3043252291","https://openalex.org/W1583355281","https://openalex.org/W2156863894","https://openalex.org/W2250493249","https://openalex.org/W2587553228","https://openalex.org/W2143404300","https://openalex.org/W3089810186","https://openalex.org/W2167946748","https://openalex.org/W3097189900"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"experiments":[3],"on":[4,28,71,79],"classifying":[5],"web":[6,17],"pages":[7,18],"by":[8,102],"genre.":[9],"Firstly,":[10],"a":[11,47,106],"corpus":[12,44],"of":[13,34,58],"1539":[14],"manually":[15],"labeled":[16],"was":[19],"prepared.":[20],"Secondly,":[21],"502":[22],"genre":[23,103],"features":[24,39],"were":[25,40,67],"selected":[26],"based":[27],"the":[29,32,35,43,72],"literature":[30],"and":[31,62,69,84],"observation":[33],"corpus.":[36],"Thirdly,":[37],"these":[38],"extracted":[41],"from":[42],"to":[45,109],"obtain":[46],"data":[48,73],"set.":[49,74],"Finally,":[50],"two":[51],"machine":[52],"learning":[53],"algorithms,":[54],"one":[55,63],"for":[56],"induction":[57],"decision":[59],"trees":[60],"(J48)":[61],"ensemble":[64,76],"algorithm":[65,77],"(bagging),":[66],"trained":[68],"tested":[70],"The":[75,97],"achieved":[78],"average":[80],"17%":[81],"better":[82,86],"precision":[83],"1.6%":[85],"accuracy,":[87],"but":[88],"slightly":[89],"worse":[90],"recall;":[91],"F-measure":[92],"did":[93],"not":[94],"vary":[95],"significantly.":[96],"results":[98],"indicate":[99],"that":[100],"classification":[101],"could":[104],"be":[105],"useful":[107],"addition":[108],"search":[110],"engines.":[111]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2014,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
