{"id":"https://openalex.org/W1973244939","doi":"https://doi.org/10.1109/icmlc.2010.5580664","title":"Web information processing and extracting","display_name":"Web information processing and extracting","publication_year":2010,"publication_date":"2010-07-01","ids":{"openalex":"https://openalex.org/W1973244939","doi":"https://doi.org/10.1109/icmlc.2010.5580664","mag":"1973244939"},"language":"en","primary_location":{"id":"doi:10.1109/icmlc.2010.5580664","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icmlc.2010.5580664","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 International Conference on Machine Learning and Cybernetics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101943964","display_name":"Kai Gao","orcid":"https://orcid.org/0000-0002-6920-850X"},"institutions":[{"id":"https://openalex.org/I34155123","display_name":"Hebei University of Science and Technology","ror":"https://ror.org/05h3pkk68","country_code":"CN","type":"education","lineage":["https://openalex.org/I34155123"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Kai Gao","raw_affiliation_strings":["Department of Information Science and Engineering, Hebei University of Science and Technology, Shijiazhuang, China","Department of information science and engineering, Hebei University of Science and Technology, Shijiazhuang 050018, China"],"affiliations":[{"raw_affiliation_string":"Department of Information Science and Engineering, Hebei University of Science and Technology, Shijiazhuang, China","institution_ids":["https://openalex.org/I34155123"]},{"raw_affiliation_string":"Department of information science and engineering, Hebei University of Science and Technology, Shijiazhuang 050018, China","institution_ids":["https://openalex.org/I34155123"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081818513","display_name":"Bao-Qin Zong","orcid":null},"institutions":[{"id":"https://openalex.org/I34155123","display_name":"Hebei University of Science and Technology","ror":"https://ror.org/05h3pkk68","country_code":"CN","type":"education","lineage":["https://openalex.org/I34155123"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bao-Qin Zong","raw_affiliation_strings":["Department of Information Science and Engineering, Hebei University of Science and Technology, Shijiazhuang, China","Department of information science and engineering, Hebei University of Science and Technology, Shijiazhuang 050018, China"],"affiliations":[{"raw_affiliation_string":"Department of Information Science and Engineering, Hebei University of Science and Technology, Shijiazhuang, China","institution_ids":["https://openalex.org/I34155123"]},{"raw_affiliation_string":"Department of information science and engineering, Hebei University of Science and Technology, Shijiazhuang 050018, China","institution_ids":["https://openalex.org/I34155123"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048801424","display_name":"Xiu-Li Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107741","display_name":"Affiliated Hospital of Chengde Medical College","ror":"https://ror.org/01bgds823","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210107741"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiu-Li Yang","raw_affiliation_strings":["Department of Information Technology, Hebei Tourism Vocational College, Chengde, China","Department of Information Technology, Hebei Tourism Vocational College, Chengde, 067000, China"],"affiliations":[{"raw_affiliation_string":"Department of Information Technology, Hebei Tourism Vocational College, Chengde, China","institution_ids":[]},{"raw_affiliation_string":"Department of Information Technology, Hebei Tourism Vocational College, Chengde, 067000, China","institution_ids":["https://openalex.org/I4210107741"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101943964"],"corresponding_institution_ids":["https://openalex.org/I34155123"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.11973665,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2350","last_page":"2355"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9785000085830688,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9739000201225281,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8350775241851807},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.8021605014801025},{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.7493672370910645},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.6720988750457764},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6388067007064819},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.5607807636260986},{"id":"https://openalex.org/keywords/search-analytics","display_name":"Search analytics","score":0.5551615953445435},{"id":"https://openalex.org/keywords/spamdexing","display_name":"Spamdexing","score":0.4953731298446655},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.4765385091304779},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.4413600265979767},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4277230501174927},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3869645595550537},{"id":"https://openalex.org/keywords/web-search-query","display_name":"Web search query","score":0.34959888458251953}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8350775241851807},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.8021605014801025},{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.7493672370910645},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.6720988750457764},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6388067007064819},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.5607807636260986},{"id":"https://openalex.org/C14838553","wikidata":"https://www.wikidata.org/wiki/Q7441639","display_name":"Search analytics","level":4,"score":0.5551615953445435},{"id":"https://openalex.org/C13565553","wikidata":"https://www.wikidata.org/wiki/Q804206","display_name":"Spamdexing","level":5,"score":0.4953731298446655},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4765385091304779},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.4413600265979767},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4277230501174927},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3869645595550537},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.34959888458251953},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icmlc.2010.5580664","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icmlc.2010.5580664","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 International Conference on Machine Learning and Cybernetics","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1489992655","https://openalex.org/W1569403765","https://openalex.org/W1965942030","https://openalex.org/W1981202432","https://openalex.org/W2029341294","https://openalex.org/W2046325278","https://openalex.org/W2066636486","https://openalex.org/W2075239892","https://openalex.org/W2138621811","https://openalex.org/W2148212498","https://openalex.org/W2153704625","https://openalex.org/W2295141584","https://openalex.org/W2623293810","https://openalex.org/W4249823756","https://openalex.org/W4251504464","https://openalex.org/W6645485993"],"related_works":["https://openalex.org/W2566658409","https://openalex.org/W3119324922","https://openalex.org/W2352686120","https://openalex.org/W2372594123","https://openalex.org/W2358310581","https://openalex.org/W2964752624","https://openalex.org/W2026132847","https://openalex.org/W4385695127","https://openalex.org/W2137810919","https://openalex.org/W1770021664"],"abstract_inverted_index":{"With":[0],"the":[1,5,19,23,31,67,73,77,108,111,120],"rapid":[2],"growth":[3],"of":[4,66,69,110],"web,":[6],"search":[7,33,43,64],"engine":[8,34,44],"has":[9],"been":[10],"an":[11],"important":[12],"tool":[13],"to":[14,22],"retrieve":[15],"relevant":[16],"information":[17,71,79,92],"from":[18],"Internet.":[20],"Due":[21],"limited":[24],"bandwidth,":[25],"storage":[26],"and":[27,81,97,113],"some":[28,39,88,114],"other":[29],"limitations,":[30],"general":[32,63],"is":[35,46,55,83],"not":[36],"suitable":[37,109],"for":[38],"situations.":[40],"A":[41],"topical":[42],"which":[45],"focused":[47,53],"on":[48,72,90,100],"collecting":[49],"domain-specific":[50],"issues":[51],"by":[52],"crawling":[54],"needed.":[56],"It":[57],"can":[58],"provide":[59],"higher":[60],"accuracy":[61],"than":[62],"because":[65],"lack":[68],"irrelevant":[70],"domain":[74],"collection,":[75],"so":[76],"web":[78,91],"processing":[80],"extracting":[82,98],"necessary.":[84],"This":[85],"paper":[86],"presents":[87],"strategies":[89],"processing,":[93],"together":[94],"with":[95],"analyzing":[96],"based":[99],"data":[101],"content":[102],"mining.":[103],"The":[104],"experimental":[105],"result":[106],"validates":[107],"approach,":[112],"problems":[115],"are":[116],"also":[117],"present":[118],"in":[119],"end.":[121]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
