{"id":"https://openalex.org/W1998224519","doi":"https://doi.org/10.1109/icsmc.2011.6083759","title":"An improved topic relevance algorithm for focused crawling","display_name":"An improved topic relevance algorithm for focused crawling","publication_year":2011,"publication_date":"2011-10-01","ids":{"openalex":"https://openalex.org/W1998224519","doi":"https://doi.org/10.1109/icsmc.2011.6083759","mag":"1998224519"},"language":"en","primary_location":{"id":"doi:10.1109/icsmc.2011.6083759","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icsmc.2011.6083759","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE International Conference on Systems, Man, and Cybernetics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055393039","display_name":"Hongwei Hao","orcid":"https://orcid.org/0000-0003-2019-516X"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hong-Wei Hao","raw_affiliation_strings":["Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, 100083, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]},{"raw_affiliation_string":"Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, 100083, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003368114","display_name":"Cui-Xia Mu","orcid":null},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cui-Xia Mu","raw_affiliation_strings":["Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, 100083, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]},{"raw_affiliation_string":"Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, 100083, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074514262","display_name":"Xu-Cheng Yin","orcid":"https://orcid.org/0000-0003-0023-0220"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xu-Cheng Yin","raw_affiliation_strings":["Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, 100083, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]},{"raw_affiliation_string":"Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, 100083, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066535698","display_name":"Li Shen","orcid":"https://orcid.org/0000-0001-6178-1427"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shen Li","raw_affiliation_strings":["Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, 100083, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]},{"raw_affiliation_string":"Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, 100083, China","institution_ids":["https://openalex.org/I92403157"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100332272","display_name":"Zhibin Wang","orcid":"https://orcid.org/0000-0001-7502-2181"},"institutions":[{"id":"https://openalex.org/I92403157","display_name":"University of Science and Technology Beijing","ror":"https://ror.org/02egmk993","country_code":"CN","type":"education","lineage":["https://openalex.org/I92403157"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhi-Bin Wang","raw_affiliation_strings":["Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, 100083, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, Beijing, China","institution_ids":["https://openalex.org/I92403157"]},{"raw_affiliation_string":"Department of Computer Science, School of Computer and Communication Engineering, University of Science and Technology Beijing, 100083, China","institution_ids":["https://openalex.org/I92403157"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5055393039"],"corresponding_institution_ids":["https://openalex.org/I92403157"],"apc_list":null,"apc_paid":null,"fwci":7.3895,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.96658926,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"41","issue":null,"first_page":"850","last_page":"855"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.972000002861023,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9668999910354614,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/tf\u2013idf","display_name":"tf\u2013idf","score":0.9292759895324707},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.8722822666168213},{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.8289909362792969},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.7976733446121216},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7899762392044067},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.69943767786026},{"id":"https://openalex.org/keywords/focused-crawler","display_name":"Focused crawler","score":0.6614469885826111},{"id":"https://openalex.org/keywords/hyperlink","display_name":"Hyperlink","score":0.6275540590286255},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.46232250332832336},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.3897656202316284},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.378673791885376},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3256470561027527},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.30363139510154724},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2846488356590271},{"id":"https://openalex.org/keywords/static-web-page","display_name":"Static web page","score":0.06576499342918396}],"concepts":[{"id":"https://openalex.org/C81758059","wikidata":"https://www.wikidata.org/wiki/Q796584","display_name":"tf\u2013idf","level":3,"score":0.9292759895324707},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.8722822666168213},{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.8289909362792969},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.7976733446121216},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7899762392044067},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.69943767786026},{"id":"https://openalex.org/C73340581","wikidata":"https://www.wikidata.org/wiki/Q5463958","display_name":"Focused crawler","level":5,"score":0.6614469885826111},{"id":"https://openalex.org/C30088001","wikidata":"https://www.wikidata.org/wiki/Q102014","display_name":"Hyperlink","level":3,"score":0.6275540590286255},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.46232250332832336},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.3897656202316284},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.378673791885376},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3256470561027527},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.30363139510154724},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2846488356590271},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.06576499342918396},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0},{"id":"https://openalex.org/C61096286","wikidata":"https://www.wikidata.org/wiki/Q7978592","display_name":"Web navigation","level":3,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icsmc.2011.6083759","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icsmc.2011.6083759","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2011 IEEE International Conference on Systems, Man, and Cybernetics","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W120261275","https://openalex.org/W173995639","https://openalex.org/W1481372933","https://openalex.org/W1489992655","https://openalex.org/W1508704473","https://openalex.org/W1581199549","https://openalex.org/W2001832505","https://openalex.org/W2045998703","https://openalex.org/W2068632118","https://openalex.org/W2076008912","https://openalex.org/W2099585315","https://openalex.org/W2116224704","https://openalex.org/W2124673015","https://openalex.org/W2147152072","https://openalex.org/W2158997610","https://openalex.org/W2170910294","https://openalex.org/W3015720892","https://openalex.org/W6634763878","https://openalex.org/W6678701036","https://openalex.org/W6685080879"],"related_works":["https://openalex.org/W4385695127","https://openalex.org/W2375180657","https://openalex.org/W4248730791","https://openalex.org/W2026132847","https://openalex.org/W1506122440","https://openalex.org/W2137810919","https://openalex.org/W2358310581","https://openalex.org/W2352686120","https://openalex.org/W2019080882","https://openalex.org/W2372594123"],"abstract_inverted_index":{"Topic":[0],"relevance":[1,19,80,85,107],"of":[2,32,113,132,159],"pages":[3,83,124],"and":[4,82,125,134,154,161],"hyperlinks":[5],"is":[6,24,43,172],"the":[7,33,53,93,96,104,111,116,142,145,150,156,167],"key":[8],"issue":[9],"in":[10,88],"focused":[11,22,34],"crawling.":[12,143],"In":[13],"this":[14],"paper,":[15],"an":[16],"improved":[17],"topic":[18,79,84,106],"algorithm":[20,139],"for":[21,45,77],"crawling":[23,90],"proposed.":[25],"First,":[26],"we":[27,136],"implement":[28],"a":[29,37],"prototype":[30,94],"system":[31,41],"crawler":[35,97,117,146],"-":[36],"topic-specific":[38],"news":[39],"gathering":[40],"which":[42],"prepared":[44],"comparative":[46],"experiments":[47,57,91],"on":[48,58,92],"different":[49],"similarity":[50],"measures":[51],"with":[52,103],"anchor":[54],"text.":[55],"Second,":[56],"Chinese":[59],"text":[60],"corpus":[61],"show":[62],"that":[63,166,176],"using":[64,70,98,118,147,170,177],"LSI":[65,119,133,181],"(Latent":[66],"Semantic":[67],"Indexing)":[68],"outperforms":[69],"TF-IDF":[71,99,160,179],"(term":[72],"frequency-":[73],"inverse":[74],"document":[75],"frequency)":[76],"hyperlink":[78],"prediction":[81],"calculation.":[86],"Third,":[87],"real":[89],"system,":[95],"has":[100],"high":[101],"performance":[102,169],"accumulated":[105],"increasing":[108],"quickly":[109],"at":[110],"beginning":[112],"crawling,":[114],"however":[115],"can":[120],"find":[121],"more":[122],"related":[123],"tunnel":[126],"through.":[127],"Fourth,":[128],"combining":[129],"their":[130],"advantages":[131],"TF-IDF,":[135],"propose":[137],"TFIDF+LSI":[138,148,171],"to":[140,175],"guide":[141],"Last,":[144],"performs":[149],"same":[151],"crawl":[152],"task":[153],"demonstrates":[155],"combination":[157],"advantage":[158],"LSI.":[162],"The":[163],"experiment":[164],"suggests":[165],"crawler's":[168],"greatly":[173],"superior":[174],"either":[178],"or":[180],"respectively.":[182]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":7},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
