{"id":"https://openalex.org/W3186673454","doi":"https://doi.org/10.1007/s40747-021-00471-1","title":"IHWC: intelligent hidden web crawler for harvesting data in urban domains","display_name":"IHWC: intelligent hidden web crawler for harvesting data in urban domains","publication_year":2021,"publication_date":"2021-07-24","ids":{"openalex":"https://openalex.org/W3186673454","doi":"https://doi.org/10.1007/s40747-021-00471-1","mag":"3186673454"},"language":"en","primary_location":{"id":"doi:10.1007/s40747-021-00471-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-021-00471-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-021-00471-1.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://link.springer.com/content/pdf/10.1007/s40747-021-00471-1.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091425834","display_name":"Sawroop Kaur","orcid":null},"institutions":[{"id":"https://openalex.org/I110360157","display_name":"Lovely Professional University","ror":"https://ror.org/00et6q107","country_code":"IN","type":"education","lineage":["https://openalex.org/I110360157"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Sawroop Kaur","raw_affiliation_strings":["Department of Computer Science and Engineering, Lovely Professional University, Phagwara, Punjab, India"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Lovely Professional University, Phagwara, Punjab, India","institution_ids":["https://openalex.org/I110360157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073215738","display_name":"Aman Singh","orcid":"https://orcid.org/0000-0001-6571-327X"},"institutions":[{"id":"https://openalex.org/I110360157","display_name":"Lovely Professional University","ror":"https://ror.org/00et6q107","country_code":"IN","type":"education","lineage":["https://openalex.org/I110360157"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Aman Singh","raw_affiliation_strings":["Department of Computer Science and Engineering, Lovely Professional University, Phagwara, Punjab, India"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Lovely Professional University, Phagwara, Punjab, India","institution_ids":["https://openalex.org/I110360157"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073515833","display_name":"G. Geetha","orcid":"https://orcid.org/0000-0001-7338-973X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"G. Geetha","raw_affiliation_strings":["Advanced Computing Research Society, Chennai, Tamil Nadu, India"],"affiliations":[{"raw_affiliation_string":"Advanced Computing Research Society, Chennai, Tamil Nadu, India","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5027459275","display_name":"Xiaochun Cheng","orcid":"https://orcid.org/0000-0003-0371-9646"},"institutions":[{"id":"https://openalex.org/I60488453","display_name":"Middlesex University","ror":"https://ror.org/01rv4p989","country_code":"GB","type":"education","lineage":["https://openalex.org/I60488453"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Xiaochun Cheng","raw_affiliation_strings":["Department of Computer Science, Middlesex University, London, NW4 4BT, UK"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Middlesex University, London, NW4 4BT, UK","institution_ids":["https://openalex.org/I60488453"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5091425834"],"corresponding_institution_ids":["https://openalex.org/I110360157"],"apc_list":{"value":1320,"currency":"GBP","value_usd":1619},"apc_paid":{"value":1320,"currency":"GBP","value_usd":1619},"fwci":1.7026,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.87510947,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"9","issue":"4","first_page":"3635","last_page":"3653"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9365000128746033,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.9688708186149597},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.70917809009552},{"id":"https://openalex.org/keywords/focused-crawler","display_name":"Focused crawler","score":0.6609453558921814},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.5706900358200073},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4880196154117584},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.47148534655570984},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4673749804496765},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4342726767063141},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.43150535225868225},{"id":"https://openalex.org/keywords/web-navigation","display_name":"Web navigation","score":0.2479892075061798},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.19843542575836182},{"id":"https://openalex.org/keywords/static-web-page","display_name":"Static web page","score":0.1269274353981018}],"concepts":[{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.9688708186149597},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.70917809009552},{"id":"https://openalex.org/C73340581","wikidata":"https://www.wikidata.org/wiki/Q5463958","display_name":"Focused crawler","level":5,"score":0.6609453558921814},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.5706900358200073},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4880196154117584},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.47148534655570984},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4673749804496765},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4342726767063141},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.43150535225868225},{"id":"https://openalex.org/C61096286","wikidata":"https://www.wikidata.org/wiki/Q7978592","display_name":"Web navigation","level":3,"score":0.2479892075061798},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.19843542575836182},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.1269274353981018},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s40747-021-00471-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-021-00471-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-021-00471-1.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:f2294f0fdd814d4094532e0e83f081d5","is_oa":true,"landing_page_url":"https://doaj.org/article/f2294f0fdd814d4094532e0e83f081d5","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Complex & Intelligent Systems, Vol 9, Iss 4, Pp 3635-3653 (2021)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1007/s40747-021-00471-1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-021-00471-1","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-021-00471-1.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities","score":0.6899999976158142}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3186673454.pdf","grobid_xml":"https://content.openalex.org/works/W3186673454.grobid-xml"},"referenced_works_count":42,"referenced_works":["https://openalex.org/W10239605","https://openalex.org/W343945789","https://openalex.org/W1489992655","https://openalex.org/W1539477445","https://openalex.org/W1569691642","https://openalex.org/W1601949013","https://openalex.org/W1613836731","https://openalex.org/W1674850363","https://openalex.org/W1969831559","https://openalex.org/W1987746047","https://openalex.org/W1999267468","https://openalex.org/W2007687650","https://openalex.org/W2026989907","https://openalex.org/W2046325278","https://openalex.org/W2081933028","https://openalex.org/W2104047470","https://openalex.org/W2110034127","https://openalex.org/W2113313827","https://openalex.org/W2114541504","https://openalex.org/W2131552526","https://openalex.org/W2145102654","https://openalex.org/W2151306141","https://openalex.org/W2155293951","https://openalex.org/W2155569834","https://openalex.org/W2162495634","https://openalex.org/W2166799707","https://openalex.org/W2170188121","https://openalex.org/W2170971772","https://openalex.org/W2175021080","https://openalex.org/W2326804586","https://openalex.org/W2481659138","https://openalex.org/W2565835457","https://openalex.org/W2788186240","https://openalex.org/W2792793459","https://openalex.org/W2806151536","https://openalex.org/W2884970528","https://openalex.org/W3037360538","https://openalex.org/W3046853877","https://openalex.org/W3081557211","https://openalex.org/W3113204707","https://openalex.org/W3154759521","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2375180657","https://openalex.org/W2042034567","https://openalex.org/W2026132847","https://openalex.org/W1506122440","https://openalex.org/W2137810919","https://openalex.org/W2358310581","https://openalex.org/W2274831913","https://openalex.org/W2352686120","https://openalex.org/W2019080882","https://openalex.org/W4385695127"],"abstract_inverted_index":{"Abstract":[0],"Due":[1],"to":[2,57,74,77,101,107,112,132,214,233,254,292],"the":[3,7,25,49,54,104,110,134,148,162,168,185,197,227,244,252,256,268,280,285,299,305],"massive":[4],"size":[5,281],"of":[6,27,42,51,140,143,200,226,229,258,267,282,304],"hidden":[8,55,91,120,204,312],"web,":[9],"searching,":[10,145],"retrieving":[11],"and":[12,15,38,59,97,146,166,213,301,308,318],"mining":[13],"rich":[14,61],"high-quality":[16],"data":[17,29,125,221,238],"can":[18,208,239,250,288],"be":[19,31,75,240],"a":[20,180,190,261,283],"daunting":[21],"task.":[22],"Moreover,":[23],"with":[24,272],"presence":[26],"forms,":[28],"cannot":[30],"accessed":[32],"easily.":[33],"Forms":[34],"are":[35,72,297],"dynamic,":[36],"heterogeneous":[37],"spread":[39],"over":[40],"trillions":[41],"web":[43,56,92,106,121,164,205,313],"pages.":[44],"Significant":[45],"efforts":[46],"have":[47],"addressed":[48],"problem":[50],"tapping":[52],"into":[53],"integrate":[58],"mine":[60],"data.":[62,114,158],"Effective":[63],"techniques,":[64],"as":[65,67,138],"well":[66,154],"application":[68],"in":[69,126,155,218,222,260,279],"special":[70,85],"cases,":[71],"required":[73,100],"explored":[76],"achieve":[78,175,319],"an":[79,118,202,277],"effective":[80,203],"harvest":[81,265,302],"rate.":[82],"One":[83,225],"such":[84,137],"area":[86],"is":[87,94,99,130,232,270],"atmospheric":[88],"science,":[89],"where":[90],"crawling":[93],"least":[95],"implemented,":[96],"crawler":[98,122,151,160,194,206,249,269,287],"crawl":[102],"through":[103],"huge":[105],"narrow":[108],"down":[109],"search":[111,255],"specific":[113,262],"In":[115],"this":[116],"study,":[117],"intelligent":[119],"for":[123,179,189,242,246],"harvesting":[124],"urban":[127],"domains":[128,212],"(IHWC)":[129],"implemented":[131],"address":[133],"relative":[135],"problems":[136],"classification":[139],"domains,":[141],"prevention":[142],"exhaustive":[144],"prioritizing":[147],"URLs.":[149],"The":[150,159,193,248,264],"also":[152],"performs":[153],"curating":[156],"pollution-related":[157],"targets":[161],"relevant":[163],"pages":[165],"discards":[167],"irrelevant":[169],"by":[170],"implementing":[171],"rejection":[172],"rules.":[173],"To":[174],"more":[176],"accurate":[177],"results":[178,296],"focused":[181],"crawl,":[182],"ICHW":[183],"crawls":[184],"websites":[186],"on":[187,210],"priority":[188],"given":[191],"topic.":[192],"has":[195],"fulfilled":[196],"dual":[198],"objective":[199],"developing":[201],"that":[207],"focus":[209],"diverse":[211],"check":[215],"its":[216],"integration":[217],"searching":[219],"pollution":[220,259],"smart":[223,230],"cities.":[224],"objectives":[228],"cities":[231],"reduce":[234],"pollution.":[235,247],"Resultant":[236],"crawled":[237],"used":[241],"finding":[243],"reason":[245],"help":[251],"user":[253],"level":[257],"area.":[263],"rate":[266,303],"compared":[271],"pioneer":[273],"existing":[274],"work.":[275],"With":[276],"increase":[278],"dataset,":[284],"presented":[286],"add":[289],"significant":[290],"value":[291],"emission":[293],"accuracy.":[294],"Our":[295],"demonstrating":[298],"accuracy":[300],"proposed":[306],"framework,":[307],"it":[309],"efficiently":[310],"collect":[311],"interfaces":[314],"from":[315],"large-scale":[316],"sites":[317],"higher":[320],"rates":[321],"than":[322],"other":[323],"crawlers.":[324]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
