{"id":"https://openalex.org/W4406495706","doi":"https://doi.org/10.1109/bigdata62323.2024.10826069","title":"Exploring Large Language Models for Analyzing Changes in Web Archive Content: A Retrieval-Augmented Generation Approach","display_name":"Exploring Large Language Models for Analyzing Changes in Web Archive Content: A Retrieval-Augmented Generation Approach","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406495706","doi":"https://doi.org/10.1109/bigdata62323.2024.10826069"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10826069","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10826069","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5106368503","display_name":"Jhon G. Botello","orcid":"https://orcid.org/0009-0009-9344-4404"},"institutions":[{"id":"https://openalex.org/I81365321","display_name":"Old Dominion University","ror":"https://ror.org/04zjtrb98","country_code":"US","type":"education","lineage":["https://openalex.org/I81365321"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jhon G. Botello","raw_affiliation_strings":["Old Dominion University,Department of Computer Science,Norfolk,Virginia,USA"],"affiliations":[{"raw_affiliation_string":"Old Dominion University,Department of Computer Science,Norfolk,Virginia,USA","institution_ids":["https://openalex.org/I81365321"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060646998","display_name":"Lesley Frew","orcid":"https://orcid.org/0000-0003-0929-049X"},"institutions":[{"id":"https://openalex.org/I81365321","display_name":"Old Dominion University","ror":"https://ror.org/04zjtrb98","country_code":"US","type":"education","lineage":["https://openalex.org/I81365321"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lesley Frew","raw_affiliation_strings":["Old Dominion University,Department of Computer Science,Norfolk,Virginia,USA"],"affiliations":[{"raw_affiliation_string":"Old Dominion University,Department of Computer Science,Norfolk,Virginia,USA","institution_ids":["https://openalex.org/I81365321"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012136427","display_name":"Jos\u00e9 J. Padilla","orcid":"https://orcid.org/0000-0003-0720-4148"},"institutions":[{"id":"https://openalex.org/I81365321","display_name":"Old Dominion University","ror":"https://ror.org/04zjtrb98","country_code":"US","type":"education","lineage":["https://openalex.org/I81365321"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jose J. Padilla","raw_affiliation_strings":["Virginia Modeling, Analysis, and Simulation Center Old Dominion University,Norfolk,Virginia,USA"],"affiliations":[{"raw_affiliation_string":"Virginia Modeling, Analysis, and Simulation Center Old Dominion University,Norfolk,Virginia,USA","institution_ids":["https://openalex.org/I81365321"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085719625","display_name":"Michele C. Weigle","orcid":"https://orcid.org/0000-0002-2787-7166"},"institutions":[{"id":"https://openalex.org/I81365321","display_name":"Old Dominion University","ror":"https://ror.org/04zjtrb98","country_code":"US","type":"education","lineage":["https://openalex.org/I81365321"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michele C. Weigle","raw_affiliation_strings":["Old Dominion University,Department of Computer Science,Norfolk,Virginia,USA"],"affiliations":[{"raw_affiliation_string":"Old Dominion University,Department of Computer Science,Norfolk,Virginia,USA","institution_ids":["https://openalex.org/I81365321"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5106368503"],"corresponding_institution_ids":["https://openalex.org/I81365321"],"apc_list":null,"apc_paid":null,"fwci":0.8142,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.8290282,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"2410","last_page":"2418"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9837999939918518,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9815000295639038,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7959599494934082},{"id":"https://openalex.org/keywords/content","display_name":"Content (measure theory)","score":0.6204681992530823},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5953443646430969},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.47726577520370483},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3994082808494568}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7959599494934082},{"id":"https://openalex.org/C2778152352","wikidata":"https://www.wikidata.org/wiki/Q5165061","display_name":"Content (measure theory)","level":2,"score":0.6204681992530823},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5953443646430969},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.47726577520370483},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3994082808494568},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10826069","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10826069","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6200000047683716,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W3035869065","https://openalex.org/W3130148376","https://openalex.org/W4200411179","https://openalex.org/W4236875673","https://openalex.org/W4310923309","https://openalex.org/W4379598302","https://openalex.org/W4381253618","https://openalex.org/W4387299695","https://openalex.org/W4391045944","https://openalex.org/W4391855109","https://openalex.org/W4405703871","https://openalex.org/W6779864429","https://openalex.org/W6847303613","https://openalex.org/W6853766485","https://openalex.org/W6860949407"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Websites":[0],"typically":[1],"display":[2],"only":[3],"their":[4],"most":[5],"recent":[6],"content.":[7],"However,":[8],"the":[9,13,58,132,135,140],"dynamic":[10],"nature":[11],"of":[12,25,60,101,131],"web":[14,78,105,126,158],"leads":[15],"to":[16,85,122],"frequent":[17],"updates":[18],"and":[19,94,111,134,156],"deletions.":[20],"Web":[21,88],"archives":[22],"preserve":[23],"snapshots":[24],"earlier":[26],"versions":[27],"for":[28,73,154],"those":[29],"interested":[30],"in":[31,76,125],"tracking":[32],"changes":[33,38,75,96,141],"over":[34],"time.":[35],"Analyzing":[36],"these":[37],"often":[39],"requires":[40],"a":[41,68,82,98],"manual":[42],"process":[43],"that":[44,107,116],"relies":[45],"on":[46,50],"traditional":[47],"methods":[48],"focused":[49],"terms":[51],"or":[52],"phrase-level":[53],"differences.":[54],"This":[55],"study":[56],"explores":[57],"capability":[59],"Large":[61],"Language":[62],"Models":[63],"(LLMs),":[64],"specifically":[65],"GPT-4o,":[66],"through":[67],"Retrieval-Augmented":[69],"Generation":[70],"(RAG)":[71],"approach":[72],"detecting":[74],"archived":[77],"pages.":[79],"Using":[80],"WARC-GPT,":[81],"RAG":[83],"pipeline":[84],"interact":[86],"with":[87],"ARChive":[89],"(WARC)":[90],"files,":[91],"we":[92],"identify":[93],"analyze":[95],"across":[97],"small":[99],"set":[100],"U.S.":[102],"federal":[103],"environmental":[104],"pages":[106],"changed":[108],"between":[109],"2016":[110],"2020.":[112],"Our":[113,143],"findings":[114],"show":[115],"GPT-4o":[117],"can":[118],"effectively":[119],"be":[120],"used":[121],"detect":[123],"inconsistencies":[124],"archive":[127],"content,":[128],"including":[129],"consideration":[130],"change":[133,159],"semantic":[136],"context":[137],"upon":[138],"which":[139],"occurred.":[142],"exploration":[144],"represents":[145],"an":[146],"initial":[147],"step":[148],"toward":[149],"using":[150],"Artificial":[151],"Intelligence":[152],"(AI)":[153],"deeper":[155],"scalable":[157],"analysis.":[160]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
