{"id":"https://openalex.org/W3112131000","doi":"https://doi.org/10.1109/bigdata50022.2020.9377796","title":"Modeling Updates of Scholarly Webpages Using Archived Data","display_name":"Modeling Updates of Scholarly Webpages Using Archived Data","publication_year":2020,"publication_date":"2020-12-10","ids":{"openalex":"https://openalex.org/W3112131000","doi":"https://doi.org/10.1109/bigdata50022.2020.9377796","mag":"3112131000"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata50022.2020.9377796","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9377796","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2012.03397","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076257035","display_name":"Yasith Jayawardana","orcid":"https://orcid.org/0000-0001-5992-6818"},"institutions":[{"id":"https://openalex.org/I81365321","display_name":"Old Dominion University","ror":"https://ror.org/04zjtrb98","country_code":"US","type":"education","lineage":["https://openalex.org/I81365321"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yasith Jayawardana","raw_affiliation_strings":["Computer Science Department, Old Dominion University, Norfolk, VA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, Old Dominion University, Norfolk, VA, USA","institution_ids":["https://openalex.org/I81365321"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077830996","display_name":"Alexander C. Nwala","orcid":"https://orcid.org/0000-0003-3408-791X"},"institutions":[{"id":"https://openalex.org/I4210119109","display_name":"Indiana University Bloomington","ror":"https://ror.org/02k40bc56","country_code":"US","type":"education","lineage":["https://openalex.org/I4210119109","https://openalex.org/I592451"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander C. Nwala","raw_affiliation_strings":["Center for Complex Networks and Systems Research Luddy School of Informatics, Computing, and Engineering, Indiana University, Bloomington, IN, USA"],"affiliations":[{"raw_affiliation_string":"Center for Complex Networks and Systems Research Luddy School of Informatics, Computing, and Engineering, Indiana University, Bloomington, IN, USA","institution_ids":["https://openalex.org/I4210119109"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081980322","display_name":"Gavindya Jayawardena","orcid":"https://orcid.org/0000-0002-9523-3346"},"institutions":[{"id":"https://openalex.org/I81365321","display_name":"Old Dominion University","ror":"https://ror.org/04zjtrb98","country_code":"US","type":"education","lineage":["https://openalex.org/I81365321"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gavindya Jayawardena","raw_affiliation_strings":["Computer Science Department, Old Dominion University, Norfolk, VA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, Old Dominion University, Norfolk, VA, USA","institution_ids":["https://openalex.org/I81365321"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075242841","display_name":"Jian Wu","orcid":"https://orcid.org/0000-0003-0173-4463"},"institutions":[{"id":"https://openalex.org/I81365321","display_name":"Old Dominion University","ror":"https://ror.org/04zjtrb98","country_code":"US","type":"education","lineage":["https://openalex.org/I81365321"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jian Wu","raw_affiliation_strings":["Computer Science Department, Old Dominion University, Norfolk, VA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, Old Dominion University, Norfolk, VA, USA","institution_ids":["https://openalex.org/I81365321"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048398447","display_name":"Sampath Jayarathna","orcid":"https://orcid.org/0000-0002-4879-7309"},"institutions":[{"id":"https://openalex.org/I81365321","display_name":"Old Dominion University","ror":"https://ror.org/04zjtrb98","country_code":"US","type":"education","lineage":["https://openalex.org/I81365321"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sampath Jayarathna","raw_affiliation_strings":["Computer Science Department, Old Dominion University, Norfolk, VA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, Old Dominion University, Norfolk, VA, USA","institution_ids":["https://openalex.org/I81365321"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081811192","display_name":"Michael L. Nelson","orcid":"https://orcid.org/0000-0003-3749-8116"},"institutions":[{"id":"https://openalex.org/I81365321","display_name":"Old Dominion University","ror":"https://ror.org/04zjtrb98","country_code":"US","type":"education","lineage":["https://openalex.org/I81365321"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael L. Nelson","raw_affiliation_strings":["Computer Science Department, Old Dominion University, Norfolk, VA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, Old Dominion University, Norfolk, VA, USA","institution_ids":["https://openalex.org/I81365321"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001294898","display_name":"C. Lee Giles","orcid":"https://orcid.org/0000-0002-1931-585X"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"C. Lee Giles","raw_affiliation_strings":["Information Sciences & Technology, Pennsylvania State University, PA, USA"],"affiliations":[{"raw_affiliation_string":"Information Sciences & Technology, Pennsylvania State University, PA, USA","institution_ids":["https://openalex.org/I130769515"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5076257035"],"corresponding_institution_ids":["https://openalex.org/I81365321"],"apc_list":null,"apc_paid":null,"fwci":0.5563,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.77376312,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"1","issue":null,"first_page":"1868","last_page":"1877"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9726999998092651,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9700000286102295,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.818859875202179},{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.7996182441711426},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.7275989055633545},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.6933158040046692},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.582321286201477},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5555958151817322},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5373067259788513},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5076831579208374}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.818859875202179},{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.7996182441711426},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.7275989055633545},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.6933158040046692},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.582321286201477},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5555958151817322},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5373067259788513},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5076831579208374},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bigdata50022.2020.9377796","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata50022.2020.9377796","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2012.03397","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2012.03397","pdf_url":"https://arxiv.org/pdf/2012.03397","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2012.03397","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2012.03397","pdf_url":"https://arxiv.org/pdf/2012.03397","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.41999998688697815,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W173995639","https://openalex.org/W1489992655","https://openalex.org/W1510484544","https://openalex.org/W1566984846","https://openalex.org/W1970841368","https://openalex.org/W1974757757","https://openalex.org/W1976624301","https://openalex.org/W1987272746","https://openalex.org/W2001832505","https://openalex.org/W2006119904","https://openalex.org/W2014766608","https://openalex.org/W2016122268","https://openalex.org/W2026632779","https://openalex.org/W2029341294","https://openalex.org/W2046941888","https://openalex.org/W2055950890","https://openalex.org/W2083089853","https://openalex.org/W2085600442","https://openalex.org/W2097411811","https://openalex.org/W2110073539","https://openalex.org/W2127536142","https://openalex.org/W2142747917","https://openalex.org/W2168190036","https://openalex.org/W2911388033","https://openalex.org/W2916572269","https://openalex.org/W2967836687","https://openalex.org/W2978352415","https://openalex.org/W3104829027","https://openalex.org/W4232464786","https://openalex.org/W4251434705","https://openalex.org/W6634031162","https://openalex.org/W6759769309"],"related_works":["https://openalex.org/W3119324922","https://openalex.org/W2352686120","https://openalex.org/W2372594123","https://openalex.org/W2358310581","https://openalex.org/W2964752624","https://openalex.org/W2026132847","https://openalex.org/W2137810919","https://openalex.org/W2089702591","https://openalex.org/W4255854114","https://openalex.org/W4385695127"],"abstract_inverted_index":{"The":[0],"vastness":[1],"of":[2,30,44,52,71,85,124,146,151,165,173],"the":[3,26,42,47,64,89,132,137,155,163,170],"web":[4,48,66,174],"imposes":[5],"a":[6,60,121,128,149],"prohibitive":[7],"cost":[8],"on":[9,63,159],"building":[10],"large-scale":[11],"search":[12],"engines":[13],"with":[14],"limited":[15],"resources.":[16],"Crawl":[17],"frontiers":[18],"thus":[19],"need":[20],"to":[21,24,105,154,168],"be":[22],"optimized":[23],"improve":[25],"coverage":[27],"and":[28,93,139,176],"freshness":[29],"crawled":[31],"content.":[32],"In":[33],"this":[34],"paper,":[35],"we":[36,58,101,161],"propose":[37],"an":[38],"approach":[39],"for":[40,131],"modeling":[41],"dynamics":[43],"change":[45],"in":[46,136],"using":[49,67],"archived":[50,83,125,166],"copies":[51,84],"webpages.":[53],"To":[54],"evaluate":[55],"its":[56],"utility,":[57],"conduct":[59],"preliminary":[61],"study":[62],"scholarly":[65],"19,977":[68],"seed":[69],"URLs":[70],"authors\u2019":[72],"homepages":[73],"obtained":[74],"from":[75,88,120],"their":[76,96,107],"Google":[77],"Scholar":[78],"profiles.":[79],"We":[80],"first":[81],"obtain":[82],"these":[86],"webpages":[87],"Internet":[90],"Archive":[91],"(IA),":[92],"estimate":[94,106,130],"when":[95],"actual":[97],"updates":[98,147],"occurred.":[99],"Next,":[100],"apply":[102],"maximum":[103],"likelihood":[104],"mean":[108],"update":[109,134],"frequency":[110,135],"(\u03bb)":[111],"values.":[112],"Our":[113],"evaluation":[114],"shows":[115],"that":[116,140,180],"\u03bb":[117],"values":[118],"derived":[119],"short":[122],"history":[123],"data":[126,167],"provide":[127],"good":[129],"true":[133],"short-term,":[138],"our":[141],"method":[142],"provides":[143],"better":[144],"estimations":[145],"at":[148],"fraction":[150],"resources":[152],"compared":[153],"baseline":[156],"models.":[157],"Based":[158],"this,":[160],"demonstrate":[162],"utility":[164],"optimize":[169],"crawling":[171],"strategy":[172],"crawlers,":[175],"uncover":[177],"important":[178],"challenges":[179],"inspire":[181],"future":[182],"research":[183],"directions.":[184]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
