{"id":"https://openalex.org/W2790102984","doi":"https://doi.org/10.1108/dta-07-2017-0053","title":"Design and implementation of crawling algorithm to collect deep web information for web archiving","display_name":"Design and implementation of crawling algorithm to collect deep web information for web archiving","publication_year":2018,"publication_date":"2018-03-19","ids":{"openalex":"https://openalex.org/W2790102984","doi":"https://doi.org/10.1108/dta-07-2017-0053","mag":"2790102984"},"language":"en","primary_location":{"id":"doi:10.1108/dta-07-2017-0053","is_oa":false,"landing_page_url":"https://doi.org/10.1108/dta-07-2017-0053","pdf_url":null,"source":{"id":"https://openalex.org/S4210171756","display_name":"Data Technologies and Applications","issn_l":"2514-9288","issn":["2514-9288","2514-9318"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319811","host_organization_name":"Emerald Publishing Limited","host_organization_lineage":["https://openalex.org/P4310319811"],"host_organization_lineage_names":["Emerald Publishing Limited"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data Technologies and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072571713","display_name":"Hyo-Jung Oh","orcid":"https://orcid.org/0000-0001-8067-2832"},"institutions":[{"id":"https://openalex.org/I80611190","display_name":"Jeonbuk National University","ror":"https://ror.org/05q92br09","country_code":"KR","type":"education","lineage":["https://openalex.org/I80611190"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Hyo-Jung Oh","raw_affiliation_strings":["Graduate School of Archives and Records Management, Chonbuk National University, Jeonju, The Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Graduate School of Archives and Records Management, Chonbuk National University, Jeonju, The Republic of Korea","institution_ids":["https://openalex.org/I80611190"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043309725","display_name":"Dong-Hyun Won","orcid":null},"institutions":[{"id":"https://openalex.org/I80611190","display_name":"Jeonbuk National University","ror":"https://ror.org/05q92br09","country_code":"KR","type":"education","lineage":["https://openalex.org/I80611190"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Dong-Hyun Won","raw_affiliation_strings":["Center for Disaster Safety Information, Chonbuk National University, Jeonju, The Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Center for Disaster Safety Information, Chonbuk National University, Jeonju, The Republic of Korea","institution_ids":["https://openalex.org/I80611190"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047456872","display_name":"Chonghyuck Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I80611190","display_name":"Jeonbuk National University","ror":"https://ror.org/05q92br09","country_code":"KR","type":"education","lineage":["https://openalex.org/I80611190"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Chonghyuck Kim","raw_affiliation_strings":["Department of English Language and Literature, Chonbuk National University, Jeonju, The Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Department of English Language and Literature, Chonbuk National University, Jeonju, The Republic of Korea","institution_ids":["https://openalex.org/I80611190"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067892905","display_name":"Sung-Hee Park","orcid":"https://orcid.org/0000-0002-4743-2551"},"institutions":[{"id":"https://openalex.org/I80611190","display_name":"Jeonbuk National University","ror":"https://ror.org/05q92br09","country_code":"KR","type":"education","lineage":["https://openalex.org/I80611190"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sung-Hee Park","raw_affiliation_strings":["Physical Medicine and Rehabilitation, Chonbuk National University, Jeonju, The Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Physical Medicine and Rehabilitation, Chonbuk National University, Jeonju, The Republic of Korea","institution_ids":["https://openalex.org/I80611190"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100329384","display_name":"Yong Kim","orcid":"https://orcid.org/0000-0002-7181-5851"},"institutions":[{"id":"https://openalex.org/I80611190","display_name":"Jeonbuk National University","ror":"https://ror.org/05q92br09","country_code":"KR","type":"education","lineage":["https://openalex.org/I80611190"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Yong Kim","raw_affiliation_strings":["Department of Library and information Science, Chonbuk National University, Jeonju, The Republic of Korea"],"affiliations":[{"raw_affiliation_string":"Department of Library and information Science, Chonbuk National University, Jeonju, The Republic of Korea","institution_ids":["https://openalex.org/I80611190"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5072571713"],"corresponding_institution_ids":["https://openalex.org/I80611190"],"apc_list":null,"apc_paid":null,"fwci":1.9635,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.8950835,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"52","issue":"2","first_page":"266","last_page":"277"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9842000007629395,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9761999845504761,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.8632878661155701},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8028866052627563},{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.7526234984397888},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.6883691549301147},{"id":"https://openalex.org/keywords/static-web-page","display_name":"Static web page","score":0.6348795890808105},{"id":"https://openalex.org/keywords/focused-crawler","display_name":"Focused crawler","score":0.5952784419059753},{"id":"https://openalex.org/keywords/web-modeling","display_name":"Web modeling","score":0.5333680510520935},{"id":"https://openalex.org/keywords/document-object-model","display_name":"Document Object Model","score":0.5286807417869568},{"id":"https://openalex.org/keywords/scripting-language","display_name":"Scripting language","score":0.5252327919006348},{"id":"https://openalex.org/keywords/web-navigation","display_name":"Web navigation","score":0.5248032808303833},{"id":"https://openalex.org/keywords/web-development","display_name":"Web development","score":0.5189516544342041},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5164477825164795},{"id":"https://openalex.org/keywords/web-design","display_name":"Web design","score":0.4825993776321411},{"id":"https://openalex.org/keywords/web-search-engine","display_name":"Web search engine","score":0.46042323112487793},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.08282122015953064}],"concepts":[{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.8632878661155701},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8028866052627563},{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.7526234984397888},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.6883691549301147},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.6348795890808105},{"id":"https://openalex.org/C73340581","wikidata":"https://www.wikidata.org/wiki/Q5463958","display_name":"Focused crawler","level":5,"score":0.5952784419059753},{"id":"https://openalex.org/C130436687","wikidata":"https://www.wikidata.org/wiki/Q7978591","display_name":"Web modeling","level":3,"score":0.5333680510520935},{"id":"https://openalex.org/C137922610","wikidata":"https://www.wikidata.org/wiki/Q2093","display_name":"Document Object Model","level":3,"score":0.5286807417869568},{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.5252327919006348},{"id":"https://openalex.org/C61096286","wikidata":"https://www.wikidata.org/wiki/Q7978592","display_name":"Web navigation","level":3,"score":0.5248032808303833},{"id":"https://openalex.org/C79373723","wikidata":"https://www.wikidata.org/wiki/Q386275","display_name":"Web development","level":3,"score":0.5189516544342041},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5164477825164795},{"id":"https://openalex.org/C521306242","wikidata":"https://www.wikidata.org/wiki/Q190637","display_name":"Web design","level":3,"score":0.4825993776321411},{"id":"https://openalex.org/C521815418","wikidata":"https://www.wikidata.org/wiki/Q4182287","display_name":"Web search engine","level":4,"score":0.46042323112487793},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.08282122015953064}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1108/dta-07-2017-0053","is_oa":false,"landing_page_url":"https://doi.org/10.1108/dta-07-2017-0053","pdf_url":null,"source":{"id":"https://openalex.org/S4210171756","display_name":"Data Technologies and Applications","issn_l":"2514-9288","issn":["2514-9288","2514-9318"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319811","host_organization_name":"Emerald Publishing Limited","host_organization_lineage":["https://openalex.org/P4310319811"],"host_organization_lineage_names":["Emerald Publishing Limited"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data Technologies and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4399999976158142,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W1964698713","https://openalex.org/W1973094124","https://openalex.org/W1996195260","https://openalex.org/W2013970953","https://openalex.org/W2125969310","https://openalex.org/W2170188121","https://openalex.org/W2344672301","https://openalex.org/W2401815318","https://openalex.org/W2559989503","https://openalex.org/W2804548096","https://openalex.org/W6685116542"],"related_works":["https://openalex.org/W182781187","https://openalex.org/W4239816769","https://openalex.org/W2277785728","https://openalex.org/W4312370889","https://openalex.org/W4205141839","https://openalex.org/W2078731629","https://openalex.org/W2941499861","https://openalex.org/W162034974","https://openalex.org/W2790102984","https://openalex.org/W2314212588"],"abstract_inverted_index":{"Purpose":[0],"The":[1,54,163,220],"purpose":[2],"of":[3,11,71,216,234],"this":[4,72,103,191],"paper":[5],"is":[6,74,184,223,239],"to":[7,36,171,174,186,194,209,241],"describe":[8],"the":[9,25,42,61,69,77,87,91,95,119,125,145,151,155,230],"development":[10],"an":[12,34,226],"algorithm":[13,35,62,97,192,222],"for":[14],"realizing":[15],"web":[16,38,43,56,120,126,146,156,188,197],"crawlers":[17],"that":[18,75],"automatically":[19],"collect":[20,37,99,141,187],"dynamically":[21],"generated":[22],"webpages":[23,47,101,143],"from":[24],"deep":[26,65,100,142,167,196],"web.":[27],"Design/methodology/approach":[28],"This":[29,122,203],"study":[30,73,123,204],"proposes":[31],"and":[32,182],"develops":[33],"information":[39,178],"as":[40,52,84,111,134,225,244],"if":[41,76,144,154],"crawler":[44,57],"gathers":[45],"static":[46],"by":[48,63,130],"managing":[49],"script":[50,85,110,136,159,200,213],"commands":[51],"links.":[53,245],"proposed":[55,96,221],"actually":[58],"experiments":[59],"with":[60,212],"collecting":[64],"webpages.":[66],"Findings":[67],"Among":[68],"findings":[70],"actual":[78],"crawling":[79],"process":[80],"provides":[81],"search":[82],"results":[83,165],"pages,":[86],"outcome":[88],"only":[89],"collects":[90],"first":[92,117],"page.":[93],"However,":[94,190],"can":[98],"in":[102],"case.":[104],"Research":[105],"limitations/implications":[106],"To":[107],"use":[108],"a":[109,112,114,135,206],"link,":[113],"human":[115],"must":[116],"analyze":[118],"document.":[121],"uses":[124],"browser":[127,147],"object":[128,148],"provided":[129],"Microsoft":[131],"Visual":[132],"Studio":[133],"launcher,":[137],"so":[138],"it":[139,183],"cannot":[140,149],"launch":[150],"script,":[152],"or":[153],"document":[157],"contains":[158],"errors.":[160],"Practical":[161],"implications":[162],"research":[164],"show":[166],"webs":[168],"are":[169],"estimated":[170],"have":[172],"450":[173],"550":[175],"times":[176],"more":[177],"than":[179],"surface":[180],"webpages,":[181],"difficult":[185],"documents.":[189],"helps":[193],"enable":[195],"collection":[198],"through":[199],"runs.":[201],"Originality/value":[202],"presents":[205],"new":[207],"method":[208],"be":[210],"utilized":[211],"links":[214],"instead":[215],"adopting":[217],"previous":[218],"keywords.":[219],"available":[224],"ordinary":[227],"URL.":[228],"From":[229],"conducted":[231],"experiment,":[232],"analysis":[233],"scripts":[235],"on":[236],"individual":[237],"websites":[238],"needed":[240],"employ":[242],"them":[243]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
