{"id":"https://openalex.org/W2142053489","doi":"https://doi.org/10.14778/1687627.1687757","title":"NEAR-Miner","display_name":"NEAR-Miner","publication_year":2009,"publication_date":"2009-08-01","ids":{"openalex":"https://openalex.org/W2142053489","doi":"https://doi.org/10.14778/1687627.1687757","mag":"2142053489"},"language":"en","primary_location":{"id":"doi:10.14778/1687627.1687757","is_oa":false,"landing_page_url":"https://doi.org/10.14778/1687627.1687757","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069988750","display_name":"Ling Chen","orcid":"https://orcid.org/0000-0002-6468-5729"},"institutions":[{"id":"https://openalex.org/I4210136150","display_name":"L3S Research Center","ror":"https://ror.org/039t4wk02","country_code":"DE","type":"facility","lineage":["https://openalex.org/I114112103","https://openalex.org/I4210136150","https://openalex.org/I94509681"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Ling Chen","raw_affiliation_strings":["L3S/University of Hannover, Hannover, Germany"],"affiliations":[{"raw_affiliation_string":"L3S/University of Hannover, Hannover, Germany","institution_ids":["https://openalex.org/I4210136150"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061002947","display_name":"Sourav S. Bhowmick","orcid":"https://orcid.org/0000-0003-1957-8016"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Sourav S. Bhowmick","raw_affiliation_strings":["Nanyang Technological University, Singapore"],"affiliations":[{"raw_affiliation_string":"Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074427964","display_name":"Wolfgang Nejdl","orcid":"https://orcid.org/0000-0003-3374-2193"},"institutions":[{"id":"https://openalex.org/I4210136150","display_name":"L3S Research Center","ror":"https://ror.org/039t4wk02","country_code":"DE","type":"facility","lineage":["https://openalex.org/I114112103","https://openalex.org/I4210136150","https://openalex.org/I94509681"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Wolfgang Nejdl","raw_affiliation_strings":["L3S/University of Hannover, Hannover, Germany"],"affiliations":[{"raw_affiliation_string":"L3S/University of Hannover, Hannover, Germany","institution_ids":["https://openalex.org/I4210136150"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5069988750"],"corresponding_institution_ids":["https://openalex.org/I4210136150"],"apc_list":null,"apc_paid":null,"fwci":0.7403,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.83706511,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"2","issue":"1","first_page":"1150","last_page":"1161"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9965999722480774,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/directory","display_name":"Directory","score":0.7496603727340698},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7428626418113708},{"id":"https://openalex.org/keywords/upload","display_name":"Upload","score":0.7099814414978027},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.6629641652107239},{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.5953978896141052},{"id":"https://openalex.org/keywords/web-mining","display_name":"Web mining","score":0.5753070116043091},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.5718590617179871},{"id":"https://openalex.org/keywords/static-web-page","display_name":"Static web page","score":0.4717738628387451},{"id":"https://openalex.org/keywords/data-web","display_name":"Data Web","score":0.43421271443367004},{"id":"https://openalex.org/keywords/web-site","display_name":"Web site","score":0.42022383213043213},{"id":"https://openalex.org/keywords/web-navigation","display_name":"Web navigation","score":0.39168429374694824},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.36071687936782837},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.33971530199050903},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.16537317633628845},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.08238786458969116}],"concepts":[{"id":"https://openalex.org/C2777683733","wikidata":"https://www.wikidata.org/wiki/Q201456","display_name":"Directory","level":2,"score":0.7496603727340698},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7428626418113708},{"id":"https://openalex.org/C71901391","wikidata":"https://www.wikidata.org/wiki/Q7126699","display_name":"Upload","level":2,"score":0.7099814414978027},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.6629641652107239},{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.5953978896141052},{"id":"https://openalex.org/C197046077","wikidata":"https://www.wikidata.org/wiki/Q785337","display_name":"Web mining","level":3,"score":0.5753070116043091},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.5718590617179871},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.4717738628387451},{"id":"https://openalex.org/C162005631","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Data Web","level":3,"score":0.43421271443367004},{"id":"https://openalex.org/C2984519610","wikidata":"https://www.wikidata.org/wiki/Q35127","display_name":"Web site","level":3,"score":0.42022383213043213},{"id":"https://openalex.org/C61096286","wikidata":"https://www.wikidata.org/wiki/Q7978592","display_name":"Web navigation","level":3,"score":0.39168429374694824},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.36071687936782837},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.33971530199050903},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.16537317633628845},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.08238786458969116}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.14778/1687627.1687757","is_oa":false,"landing_page_url":"https://doi.org/10.14778/1687627.1687757","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W1503924817","https://openalex.org/W1539286053","https://openalex.org/W1559144499","https://openalex.org/W1565377632","https://openalex.org/W1566984846","https://openalex.org/W1570573463","https://openalex.org/W1571433409","https://openalex.org/W1581468894","https://openalex.org/W1589386793","https://openalex.org/W1938740620","https://openalex.org/W1976624301","https://openalex.org/W2018928332","https://openalex.org/W2029341294","https://openalex.org/W2038378248","https://openalex.org/W2050921907","https://openalex.org/W2085922539","https://openalex.org/W2101257087","https://openalex.org/W2107031757","https://openalex.org/W2127536142","https://openalex.org/W2143275903","https://openalex.org/W2145990704","https://openalex.org/W2149425467","https://openalex.org/W2151932833","https://openalex.org/W2152593687","https://openalex.org/W2157748587","https://openalex.org/W2162502927","https://openalex.org/W2171070229","https://openalex.org/W2210278139","https://openalex.org/W2913520302","https://openalex.org/W3204318296","https://openalex.org/W4205774955","https://openalex.org/W4249960090","https://openalex.org/W4300870773","https://openalex.org/W6632181570","https://openalex.org/W6633894697","https://openalex.org/W6657826343","https://openalex.org/W6676028428"],"related_works":["https://openalex.org/W2554450732","https://openalex.org/W2586225588","https://openalex.org/W2318718760","https://openalex.org/W2053468713","https://openalex.org/W2240248956","https://openalex.org/W2605189665","https://openalex.org/W1579868569","https://openalex.org/W2527456778","https://openalex.org/W3022887243","https://openalex.org/W2142053489"],"abstract_inverted_index":{"Web":[0,7,25,36,48,72,146,152,167,176,186],"archives":[1],"preserve":[2],"the":[3,32,98,115,125,142,150,156,172,179,195,198,222,225,235,238,254],"history":[4,144],"of":[5,17,34,42,92,145,149,224,237],"autonomous":[6],"sites":[8],"and":[9,19,107,158],"are":[10,75,82,94,202],"potential":[11],"gold":[12],"mines":[13,159],"for":[14,69,261],"all":[15],"kinds":[16],"media":[18],"business":[20],"analysts.":[21],"The":[22],"most":[23],"common":[24],"archiving":[26],"technique":[27],"uses":[28],"crawlers":[29],"to":[30,84,105,249],"automate":[31],"process":[33,228],"collecting":[35],"pages.":[37],"However,":[38],"(re)downloading":[39,71],"entire":[40],"collection":[41],"pages":[43,73,93,109],"periodically":[44],"from":[45,97,124],"a":[46,57,65,89],"large":[47],"site":[49,153],"is":[50,102,246],"unfeasible.":[51],"In":[52,127],"this":[53,61],"paper,":[54],"we":[55,130,182],"take":[56],"step":[58],"towards":[59],"addressing":[60],"problem.":[62],"We":[63],"devise":[64],"data":[66,134,216],"mining-driven":[67],"policy":[68],"selectively":[70],"that":[74,110,140,192,218,244],"located":[76],"in":[77,155,207,233],"hierarchical":[78],"directory":[79,147],"structures":[80,148],"which":[81,201],"believed":[83],"have":[85,111],"changed":[86,113],"significantly":[87,229],"(e.g.,":[88],"substantial":[90],"percentage":[91],"inserted":[95],"to/removed":[96],"directory).":[99],"Consequently,":[100],"there":[101],"no":[103],"need":[104],"download":[106],"maintain":[108],"not":[112,247],"since":[114],"last":[116],"crawl":[117],"as":[118,253],"they":[119],"can":[120,257],"be":[121,258],"easily":[122],"retrieved":[123],"archive.":[126],"our":[128,219,241],"approach,":[129],"propose":[131,183],"an":[132,184],"off-line":[133],"mining":[135,255],"algorithm":[136,189],"called":[137,190],"near-":[138],"Miner":[139],"analyzes":[141],"evolution":[143,173],"original":[151],"stored":[154],"archive":[157,187,226,262],"negatively":[160,203],"correlated":[161,204],"association":[162],"rules":[163,170,256],"(near)":[164],"between":[165,175],"ancestor-descendant":[166],"directories.":[168,177],"These":[169],"indicate":[171],"correlations":[174],"Using":[178],"discovered":[180],"rules,":[181],"efficient":[185],"maintenance":[188,227,263],"warm":[191],"optimally":[193],"skips":[194],"subdirectories":[196],"(during":[197],"next":[199],"crawl)":[200],"with":[205,214],"it":[206,245],"undergoing":[208],"significant":[209],"changes.":[210],"Our":[211],"experimental":[212],"results":[213],"real":[215],"show":[217],"approach":[220],"improves":[221],"efficiency":[223],"while":[230],"sacrificing":[231],"slightly":[232],"keeping":[234],"\"freshness\"":[236],"archives.":[239],"Furthermore,":[240],"experiments":[242],"demonstrate":[243],"necessary":[248],"discover":[250],"nears":[251],"frequently":[252],"utilized":[259],"effectively":[260],"over":[264],"multiple":[265],"versions.":[266]},"counts_by_year":[{"year":2013,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2016-06-24T00:00:00"}
