{"id":"https://openalex.org/W4200319214","doi":"https://doi.org/10.14704/web/v18i2/web18318","title":"Automatic Web Page Classification System with Improved Accuracy","display_name":"Automatic Web Page Classification System with Improved Accuracy","publication_year":2021,"publication_date":"2021-12-23","ids":{"openalex":"https://openalex.org/W4200319214","doi":"https://doi.org/10.14704/web/v18i2/web18318"},"language":"en","primary_location":{"id":"doi:10.14704/web/v18i2/web18318","is_oa":true,"landing_page_url":"https://doi.org/10.14704/web/v18i2/web18318","pdf_url":null,"source":{"id":"https://openalex.org/S4210195749","display_name":"Webology","issn_l":"1735-188X","issn":["1735-188X"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310318996","host_organization_name":"University of Tehran Press","host_organization_lineage":["https://openalex.org/P4310318996"],"host_organization_lineage_names":["University of Tehran Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Webology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.14704/web/v18i2/web18318","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017697580","display_name":"Chait hra","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chait hra","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057948007","display_name":"G. M. Lingaraju","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dr.G.M. Lingaraju","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5028178732","display_name":"Dr.S. Jagannatha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dr.S. Jagannatha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5017697580"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.29699866,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"18","issue":"2","first_page":"225","last_page":"242"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9818000197410583,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9818000197410583,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9513999819755554,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9314000010490417,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.819343090057373},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.6554986238479614},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6347566843032837},{"id":"https://openalex.org/keywords/bigram","display_name":"Bigram","score":0.536104679107666},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5082404017448425},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.4958864152431488},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.45238757133483887},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.4397355914115906},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.37681952118873596},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3540927469730377},{"id":"https://openalex.org/keywords/trigram","display_name":"Trigram","score":0.19625860452651978}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.819343090057373},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.6554986238479614},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6347566843032837},{"id":"https://openalex.org/C108757681","wikidata":"https://www.wikidata.org/wiki/Q2773912","display_name":"Bigram","level":3,"score":0.536104679107666},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5082404017448425},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.4958864152431488},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.45238757133483887},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.4397355914115906},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.37681952118873596},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3540927469730377},{"id":"https://openalex.org/C137546455","wikidata":"https://www.wikidata.org/wiki/Q3213474","display_name":"Trigram","level":2,"score":0.19625860452651978},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14704/web/v18i2/web18318","is_oa":true,"landing_page_url":"https://doi.org/10.14704/web/v18i2/web18318","pdf_url":null,"source":{"id":"https://openalex.org/S4210195749","display_name":"Webology","issn_l":"1735-188X","issn":["1735-188X"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310318996","host_organization_name":"University of Tehran Press","host_organization_lineage":["https://openalex.org/P4310318996"],"host_organization_lineage_names":["University of Tehran Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Webology","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.14704/web/v18i2/web18318","is_oa":true,"landing_page_url":"https://doi.org/10.14704/web/v18i2/web18318","pdf_url":null,"source":{"id":"https://openalex.org/S4210195749","display_name":"Webology","issn_l":"1735-188X","issn":["1735-188X"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310318996","host_organization_name":"University of Tehran Press","host_organization_lineage":["https://openalex.org/P4310318996"],"host_organization_lineage_names":["University of Tehran Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Webology","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3173084154","https://openalex.org/W2982021180","https://openalex.org/W2251497876","https://openalex.org/W2241081188","https://openalex.org/W2128567707","https://openalex.org/W159278796","https://openalex.org/W4327499987","https://openalex.org/W2011383762","https://openalex.org/W2035962958","https://openalex.org/W2146546639"],"abstract_inverted_index":{"Nowadays,":[0],"the":[1,81,92,102,121,126,143,157,164,170,177,188,195,200,211,224,232,239,243,252,255,267,269,282,290],"Internet":[2],"contain":[3],"s":[4],"a":[5,16,35,52],"wide":[6],"variety":[7,36],"of":[8,37,55,84,94,104,125,210,241,251,292],"online":[9],"documents,":[10],"making":[11],"finding":[12,242],"useful":[13,33],"information":[14,46],"about":[15],"given":[17],"subject":[18],"impossible,":[19],"as":[20,22],"well":[21],"retrieving":[23],"irrelevant":[24],"pages.":[25,67,89,128,294],"Web":[26],"document":[27,184],"and":[28,42,45,112,123,163,206],"page":[29,57,179,271],"recognition":[30],"software":[31],"is":[32,213,228],"in":[34,133,238],"fields,":[38],"including":[39],"news,":[40],"medicine,":[41],"fitness,":[43],"research,":[44],"technology.":[47],"To":[48],"enhance":[49],"search":[50],"capability,":[51],"large":[53],"number":[54],"web":[56,66,76,106,127,178,270,293],"classification":[58,70,96,103,124,218,291],"methods":[59],"have":[60,147],"been":[61],"proposed,":[62],"especially":[63],"for":[64,120,203,217],"news":[65,75,105,140],"Furthermore":[68],"existing":[69],"approaches":[71],"seek":[72],"to":[73,91,222],"distinguish":[74],"pages":[77,107],"while":[78],"still":[79],"reducing":[80],"high":[82],"dimensionality":[83],"features":[85,171,257],"derived":[86],"from":[87,138],"these":[88],"Due":[90],"lack":[93],"automated":[95],"methods,":[97],"this":[98,134],"paper":[99,135],"focuses":[100],"on":[101,109,289],"based":[108],"their":[110],"scarcity":[111],"importance.":[113],"This":[114,208],"work":[115,145,189],"will":[116,236],"establish":[117],"different":[118,204],"models":[119],"identification":[122],"The":[129,275],"data":[130],"sets":[131],"used":[132,148,216],"were":[136],"collected":[137],"popular":[139],"websites.":[141],"In":[142,187],"research":[144],"we":[146],"BBC":[149],"dataset":[150],"that":[151,245,278],"has":[152,280],"five":[153],"predefined":[154],"categories.":[155,253],"Initially":[156],"input":[158],"source":[159],"can":[160,166,172,258,272],"be":[161,167,173,237,259,273],"preprocessed":[162],"errors":[165],"eliminated.":[168],"Then":[169,254],"extracted":[174],"depend":[175,265],"upon":[176,266],"reviews":[180],"using":[181,231,261],"Term":[182],"frequency-inverse":[183],"frequency":[185],"vectorization.":[186],"2225":[190],"documents":[191],"are":[192,246],"represented":[193],"with":[194,249],"15286":[196],"features,":[197],"which":[198,235,285],"represents":[199],"tf-idf":[201],"score":[202],"unigrams":[205],"bigrams.":[207],"type":[209],"representation":[212],"not":[214],"only":[215],"task":[219,240],"also":[220],"helpful":[221],"analyze":[223],"dataset.":[225],"Feature":[226],"selection":[227],"done":[229],"by":[230],"chi-squared":[233,262],"test":[234],"terms":[244],"most":[247],"correlated":[248],"each":[250],"pointed":[256],"selected":[260],"test.":[263],"Finally":[264],"classifier":[268],"classified.":[274],"results":[276],"showed":[277],"list":[279],"obtained":[281],"highest":[283],"percentage,":[284],"reflect":[286],"its":[287],"effectiveness":[288]},"counts_by_year":[],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2025-10-10T00:00:00"}
