{"id":"https://openalex.org/W3094573539","doi":"https://doi.org/10.1109/icccnt49239.2020.9225369","title":"Smart Crawler for Harvesting Deep web with Multi-Classification","display_name":"Smart Crawler for Harvesting Deep web with Multi-Classification","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W3094573539","doi":"https://doi.org/10.1109/icccnt49239.2020.9225369","mag":"3094573539"},"language":"en","primary_location":{"id":"doi:10.1109/icccnt49239.2020.9225369","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icccnt49239.2020.9225369","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 11th International Conference on Computing, Communication and Networking Technologies (ICCCNT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090822865","display_name":"Ajay Khare","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ajay Khare","raw_affiliation_strings":["VJTI, Mumbai, India"],"affiliations":[{"raw_affiliation_string":"VJTI, Mumbai, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081794824","display_name":"Ashwini Dalvi","orcid":"https://orcid.org/0000-0001-9015-457X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ashwini Dalvi","raw_affiliation_strings":["VJTI, Mumbai, India"],"affiliations":[{"raw_affiliation_string":"VJTI, Mumbai, India","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000629645","display_name":"Faruk Kazi","orcid":"https://orcid.org/0000-0002-6551-3021"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Faruk Kazi","raw_affiliation_strings":["VJTI, Mumbai, India"],"affiliations":[{"raw_affiliation_string":"VJTI, Mumbai, India","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5090822865"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.8683,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.89456664,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.9172415137290955},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7711612582206726},{"id":"https://openalex.org/keywords/hyperlink","display_name":"Hyperlink","score":0.651896059513092},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.587242603302002},{"id":"https://openalex.org/keywords/focused-crawler","display_name":"Focused crawler","score":0.577450692653656},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.5700728297233582},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5581134557723999},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5294798612594604},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5031968951225281},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4932798743247986},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.47569096088409424},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.4753824472427368},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.4450737237930298},{"id":"https://openalex.org/keywords/deep-web","display_name":"Deep Web","score":0.4436423182487488},{"id":"https://openalex.org/keywords/web-navigation","display_name":"Web navigation","score":0.3880349397659302},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3865341246128082},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3637782335281372},{"id":"https://openalex.org/keywords/static-web-page","display_name":"Static web page","score":0.3087814450263977}],"concepts":[{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.9172415137290955},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7711612582206726},{"id":"https://openalex.org/C30088001","wikidata":"https://www.wikidata.org/wiki/Q102014","display_name":"Hyperlink","level":3,"score":0.651896059513092},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.587242603302002},{"id":"https://openalex.org/C73340581","wikidata":"https://www.wikidata.org/wiki/Q5463958","display_name":"Focused crawler","level":5,"score":0.577450692653656},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.5700728297233582},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5581134557723999},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5294798612594604},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5031968951225281},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4932798743247986},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.47569096088409424},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.4753824472427368},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.4450737237930298},{"id":"https://openalex.org/C46721378","wikidata":"https://www.wikidata.org/wiki/Q221989","display_name":"Deep Web","level":3,"score":0.4436423182487488},{"id":"https://openalex.org/C61096286","wikidata":"https://www.wikidata.org/wiki/Q7978592","display_name":"Web navigation","level":3,"score":0.3880349397659302},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3865341246128082},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3637782335281372},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.3087814450263977},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icccnt49239.2020.9225369","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icccnt49239.2020.9225369","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 11th International Conference on Computing, Communication and Networking Technologies (ICCCNT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W291262356","https://openalex.org/W2061809273","https://openalex.org/W2094930182","https://openalex.org/W2130760429","https://openalex.org/W2170971772","https://openalex.org/W2214737927","https://openalex.org/W2326804586","https://openalex.org/W2417549947","https://openalex.org/W2610357063","https://openalex.org/W2729595233","https://openalex.org/W2782709738","https://openalex.org/W2811483103","https://openalex.org/W2890600430","https://openalex.org/W2911388033","https://openalex.org/W2944304626","https://openalex.org/W2957728461","https://openalex.org/W4211152208","https://openalex.org/W4236259340","https://openalex.org/W6754293403"],"related_works":["https://openalex.org/W2375180657","https://openalex.org/W4385695127","https://openalex.org/W1506122440","https://openalex.org/W2042034567","https://openalex.org/W2026132847","https://openalex.org/W2358310581","https://openalex.org/W2019080882","https://openalex.org/W2384770049","https://openalex.org/W2371725684","https://openalex.org/W3184910030"],"abstract_inverted_index":{"In":[0,93],"recent":[1],"era":[2],"data":[3,20,198,205],"available":[4],"on":[5,126,145],"the":[6,24,42,55,80,104,118,129],"internet":[7],"is":[8,21,36,89,210],"playing":[9],"a":[10,60,98,201,214],"vital":[11],"role.":[12],"According":[13],"to":[14,31,40,90,138,147,231],"research,":[15],"most":[16],"precious":[17],"and":[18,66,87,107,124,141,161,171,186,207,225],"valuable":[19],"present":[22],"in":[23,29,204,213],"deep":[25,43,56,64,73,105],"web":[26,35,44,65,74,106],"so":[27],"interest":[28],"techniques":[30,179],"efficiently":[32,102],"site":[33,156],"invisible":[34],"increasing.":[37],"The":[38,133,151],"challenges":[39],"extract":[41],"are":[45,192],"requirement":[46],"of":[47,50,54,59,63,69,85,121,149,173],"huge":[48,202],"volume":[49],"resources,":[51],"dynamic":[52],"nature":[53],"web,":[57],"coverage":[58],"wider":[61],"area":[62],"higher":[67,228],"efficiency":[68],"collected":[70],"results":[71,219],"from":[72,117,194],"with":[75,78,175,197,220],"accuracy.":[76],"Along":[77],"all":[79,208],"above":[81],"challenges,":[82],"user":[83],"demand":[84],"privacy":[86],"identity":[88],"be":[91],"maintained.":[92],"this":[94,209],"paper":[95],"we":[96],"proposed":[97],"smart":[99,113],"crawler":[100,114,134,152],"that":[101],"searches":[103],"avoids":[108],"visiting":[109],"irrelevant":[110],"pages.":[111],"A":[112],"starts":[115],"crawling":[116,127],"center":[119],"page":[120],"seed":[122],"URL":[123],"goes":[125],"till":[128],"last":[130],"link":[131,222],"available.":[132],"has":[135],"an":[136],"ability":[137],"separate":[139],"active":[140],"inactive":[142],"links":[143],"based":[144],"requests":[146],"sever":[148],"hyperlink.":[150],"also":[153],"contains":[154],"text-based":[155],"classifier":[157],"using":[158],"neural":[159],"network":[160],"natural":[162],"language":[163],"processing":[164],"as":[165,180],"Term":[166],"Frequency":[167,170],"Inverse":[168],"Document":[169],"Bag":[172],"Words":[174],"supervised":[176],"machine":[177,185],"learning":[178],"logistic":[181],"regression,":[182],"support":[183],"vector":[184],"naive":[187],"bayes.":[188],"Also":[189],"HTML":[190],"tags":[191],"extracted":[193],"hyperlinks":[195],"along":[196],"which":[199],"plays":[200],"role":[203],"analysis":[206],"separately":[211],"saved":[212],"centralized":[215],"database.":[216],"Our":[217],"experimental":[218],"efficient":[221],"reaping":[223],"rate":[224],"classification":[226],"show":[227],"accuracy":[229],"compared":[230],"different":[232],"crawlers.":[233]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
