{"id":"https://openalex.org/W2570716469","doi":"https://doi.org/10.1109/besc.2016.7804487","title":"DGWC: Distributed and generic web crawler for online information extraction","display_name":"DGWC: Distributed and generic web crawler for online information extraction","publication_year":2016,"publication_date":"2016-11-01","ids":{"openalex":"https://openalex.org/W2570716469","doi":"https://doi.org/10.1109/besc.2016.7804487","mag":"2570716469"},"language":"en","primary_location":{"id":"doi:10.1109/besc.2016.7804487","is_oa":false,"landing_page_url":"https://doi.org/10.1109/besc.2016.7804487","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 International Conference on Behavioral, Economic and Socio-cultural Computing (BESC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100388597","display_name":"Lu Zhang","orcid":"https://orcid.org/0000-0002-0385-2327"},"institutions":[{"id":"https://openalex.org/I137056471","display_name":"Nanjing University of Finance and Economics","ror":"https://ror.org/031y8am81","country_code":"CN","type":"education","lineage":["https://openalex.org/I137056471"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lu Zhang","raw_affiliation_strings":["Jiangsu Provincial Key Lab. of E-Business, Nanjing University of Finance and Economics, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Jiangsu Provincial Key Lab. of E-Business, Nanjing University of Finance and Economics, China","institution_ids":["https://openalex.org/I137056471"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038173672","display_name":"Zhan Bu","orcid":"https://orcid.org/0000-0002-7582-8203"},"institutions":[{"id":"https://openalex.org/I137056471","display_name":"Nanjing University of Finance and Economics","ror":"https://ror.org/031y8am81","country_code":"CN","type":"education","lineage":["https://openalex.org/I137056471"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhan Bu","raw_affiliation_strings":["Nanjing University of Finance and Economics, Nanjing, Jiangsu, CN"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Nanjing University of Finance and Economics, Nanjing, Jiangsu, CN","institution_ids":["https://openalex.org/I137056471"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063565261","display_name":"Zhiang Wu","orcid":"https://orcid.org/0000-0002-0591-1861"},"institutions":[{"id":"https://openalex.org/I137056471","display_name":"Nanjing University of Finance and Economics","ror":"https://ror.org/031y8am81","country_code":"CN","type":"education","lineage":["https://openalex.org/I137056471"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiang Wu","raw_affiliation_strings":["Jiangsu Provincial Key Lab. of E-Business, Nanjing University of Finance and Economics, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Jiangsu Provincial Key Lab. of E-Business, Nanjing University of Finance and Economics, China","institution_ids":["https://openalex.org/I137056471"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060815634","display_name":"Jie Cao","orcid":"https://orcid.org/0000-0002-9942-3243"},"institutions":[{"id":"https://openalex.org/I137056471","display_name":"Nanjing University of Finance and Economics","ror":"https://ror.org/031y8am81","country_code":"CN","type":"education","lineage":["https://openalex.org/I137056471"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Cao","raw_affiliation_strings":["Jiangsu Provincial Key Lab. of E-Business, Nanjing University of Finance and Economics, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Jiangsu Provincial Key Lab. of E-Business, Nanjing University of Finance and Economics, China","institution_ids":["https://openalex.org/I137056471"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.7614,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.92665358,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"2016","issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9800000190734863,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9472000002861023,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.9459336400032043},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.861716628074646},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.7084805369377136},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.6965327262878418},{"id":"https://openalex.org/keywords/focused-crawler","display_name":"Focused crawler","score":0.6135857105255127},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5446897745132446},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5376827120780945},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.44672736525535583},{"id":"https://openalex.org/keywords/static-web-page","display_name":"Static web page","score":0.3764185309410095},{"id":"https://openalex.org/keywords/web-navigation","display_name":"Web navigation","score":0.3472871780395508},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.23566192388534546}],"concepts":[{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.9459336400032043},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.861716628074646},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.7084805369377136},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.6965327262878418},{"id":"https://openalex.org/C73340581","wikidata":"https://www.wikidata.org/wiki/Q5463958","display_name":"Focused crawler","level":5,"score":0.6135857105255127},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5446897745132446},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5376827120780945},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.44672736525535583},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.3764185309410095},{"id":"https://openalex.org/C61096286","wikidata":"https://www.wikidata.org/wiki/Q7978592","display_name":"Web navigation","level":3,"score":0.3472871780395508},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.23566192388534546}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/besc.2016.7804487","is_oa":false,"landing_page_url":"https://doi.org/10.1109/besc.2016.7804487","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 International Conference on Behavioral, Economic and Socio-cultural Computing (BESC)","raw_type":"proceedings-article"},{"id":"mag:2745770362","is_oa":false,"landing_page_url":"http://jglobal.jst.go.jp/en/public/201702242941129787","pdf_url":null,"source":{"id":"https://openalex.org/S4306512817","display_name":"IEEE Conference Proceedings","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"IEEE Conference Proceedings","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.6499999761581421,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W1566513354","https://openalex.org/W1977746397","https://openalex.org/W1992657934","https://openalex.org/W2012575882","https://openalex.org/W2041306792","https://openalex.org/W2084745740","https://openalex.org/W2133814403","https://openalex.org/W2160529825","https://openalex.org/W2161127713","https://openalex.org/W4255475599"],"related_works":["https://openalex.org/W4385695127","https://openalex.org/W2402678663","https://openalex.org/W2277785728","https://openalex.org/W2963706618","https://openalex.org/W2112685907","https://openalex.org/W2134614621","https://openalex.org/W3201526811","https://openalex.org/W2388966984","https://openalex.org/W2389761961","https://openalex.org/W2113184419"],"abstract_inverted_index":{"Online":[0],"information":[1],"has":[2],"become":[3,60],"important":[4],"data":[5,152],"source":[6],"to":[7,34,64,86,93,117,134],"analyze":[8],"the":[9,41,48,52,55,106,110,119,127,136,145,154],"public":[10],"opinion":[11],"and":[12,20,30,47,54,75,89,99,109,156],"behavior,":[13],"which":[14,82],"is":[15,132],"significant":[16],"for":[17],"social":[18],"management":[19],"business":[21],"decision.":[22],"Web":[23],"crawler":[24,66,78],"systems":[25],"target":[26],"at":[27],"automatically":[28],"download":[29],"parse":[31,90],"web":[32,45,65,77,91],"pages":[33,46,92],"extract":[35,135],"expected":[36],"online":[37],"information.":[38],"However,":[39],"as":[40],"rapid":[42],"increasing":[43],"of":[44,57,143,158],"heterogeneous":[49],"page":[50,146],"structures,":[51],"performance":[53],"rules":[56],"parsing":[58,144],"have":[59],"two":[61],"serious":[62],"challenges":[63],"systems.":[67],"In":[68],"this":[69],"paper,":[70],"we":[71,104],"propose":[72],"a":[73,97,113,129],"distributed":[74],"generic":[76],"system":[79,120],"(DGWC),":[80],"in":[81,112],"spiders":[83],"are":[84],"scheduled":[85],"parallel":[87],"access":[88],"improve":[94],"performance,":[95],"utilized":[96],"shared":[98],"memory":[100],"based":[101],"database.":[102],"Furthermore,":[103],"package":[105],"spider":[107],"program":[108],"dependencies":[111],"container":[114],"called":[115],"Docker":[116],"make":[118],"easily":[121],"horizontal":[122],"scaling.":[123],"Last":[124],"but":[125],"not":[126],"least,":[128],"statistics-based":[130],"approach":[131],"proposed":[133],"main":[137],"text":[138],"using":[139],"supervised-learning":[140],"classifier":[141],"instead":[142],"structures.":[147],"Experimental":[148],"results":[149],"on":[150],"real-world":[151],"validate":[153],"efficiency":[155],"effectiveness":[157],"DGWC.":[159]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
