{"id":"https://openalex.org/W7143350553","doi":"https://doi.org/10.1016/j.datak.2026.102598","title":"A systematic review of web scraping: Techniques, LLM-enhanced approaches, performance metrics, and legal\u2013ethical issues","display_name":"A systematic review of web scraping: Techniques, LLM-enhanced approaches, performance metrics, and legal\u2013ethical issues","publication_year":2026,"publication_date":"2026-03-30","ids":{"openalex":"https://openalex.org/W7143350553","doi":"https://doi.org/10.1016/j.datak.2026.102598"},"language":"en","primary_location":{"id":"doi:10.1016/j.datak.2026.102598","is_oa":false,"landing_page_url":"https://doi.org/10.1016/j.datak.2026.102598","pdf_url":null,"source":{"id":"https://openalex.org/S136993123","display_name":"Data & Knowledge Engineering","issn_l":"0169-023X","issn":["0169-023X","1872-6933"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data &amp; Knowledge Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026335311","display_name":"Navroz Kaur Kahlon","orcid":"https://orcid.org/0000-0003-4209-727X"},"institutions":[{"id":"https://openalex.org/I79161377","display_name":"Punjabi University","ror":"https://ror.org/00xdn8y92","country_code":"IN","type":"education","lineage":["https://openalex.org/I79161377"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Navroz Kaur Kahlon","raw_affiliation_strings":["Department of Computer Science and Engineering, Punjabi University, Patiala, 147002, Punjab, India"],"raw_orcid":"https://orcid.org/0000-0003-4209-727X","affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Punjabi University, Patiala, 147002, Punjab, India","institution_ids":["https://openalex.org/I79161377"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033781584","display_name":"Williamjeet Singh","orcid":"https://orcid.org/0000-0002-7763-9174"},"institutions":[{"id":"https://openalex.org/I79161377","display_name":"Punjabi University","ror":"https://ror.org/00xdn8y92","country_code":"IN","type":"education","lineage":["https://openalex.org/I79161377"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Williamjeet Singh","raw_affiliation_strings":["Department of Computer Science and Engineering, Punjabi University, Patiala, 147002, Punjab, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Punjabi University, Patiala, 147002, Punjab, India","institution_ids":["https://openalex.org/I79161377"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5026335311"],"corresponding_institution_ids":["https://openalex.org/I79161377"],"apc_list":{"value":2590,"currency":"USD","value_usd":2590},"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.69528112,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"164","issue":null,"first_page":"102598","last_page":"102598"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.0697999969124794,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.0697999969124794,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12479","display_name":"Web Application Security Vulnerabilities","score":0.05550000071525574,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12519","display_name":"Cybercrime and Law Enforcement Studies","score":0.03909999877214432,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.32899999618530273},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.28949999809265137},{"id":"https://openalex.org/keywords/web-application","display_name":"Web application","score":0.27149999141693115},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.2498999983072281}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.527899980545044},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.41019999980926514},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.32899999618530273},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.3147999942302704},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3100000023841858},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.28949999809265137},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.28600001335144043},{"id":"https://openalex.org/C118643609","wikidata":"https://www.wikidata.org/wiki/Q189210","display_name":"Web application","level":2,"score":0.27149999141693115},{"id":"https://openalex.org/C108827166","wikidata":"https://www.wikidata.org/wiki/Q175975","display_name":"Internet privacy","level":1,"score":0.2619999945163727},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.2498999983072281}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1016/j.datak.2026.102598","is_oa":false,"landing_page_url":"https://doi.org/10.1016/j.datak.2026.102598","pdf_url":null,"source":{"id":"https://openalex.org/S136993123","display_name":"Data & Knowledge Engineering","issn_l":"0169-023X","issn":["0169-023X","1872-6933"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data &amp; Knowledge Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":172,"referenced_works":["https://openalex.org/W197097210","https://openalex.org/W1501343905","https://openalex.org/W1523479877","https://openalex.org/W1527650713","https://openalex.org/W1529013893","https://openalex.org/W1927671444","https://openalex.org/W1965725152","https://openalex.org/W1968016146","https://openalex.org/W1987515118","https://openalex.org/W1991795051","https://openalex.org/W1999361961","https://openalex.org/W2008046805","https://openalex.org/W2008525336","https://openalex.org/W2020366046","https://openalex.org/W2040757233","https://openalex.org/W2041190309","https://openalex.org/W2045986835","https://openalex.org/W2048001624","https://openalex.org/W2058294751","https://openalex.org/W2071628657","https://openalex.org/W2073985982","https://openalex.org/W2076470289","https://openalex.org/W2084577425","https://openalex.org/W2094591158","https://openalex.org/W2106568316","https://openalex.org/W2115789403","https://openalex.org/W2118131693","https://openalex.org/W2123354009","https://openalex.org/W2126056371","https://openalex.org/W2133669904","https://openalex.org/W2134150392","https://openalex.org/W2134172329","https://openalex.org/W2148317291","https://openalex.org/W2160189941","https://openalex.org/W2168196213","https://openalex.org/W2238905369","https://openalex.org/W2264422083","https://openalex.org/W2284698607","https://openalex.org/W2296164608","https://openalex.org/W2302373087","https://openalex.org/W2326782115","https://openalex.org/W2333677440","https://openalex.org/W2336882399","https://openalex.org/W2346366194","https://openalex.org/W2346655396","https://openalex.org/W2417750249","https://openalex.org/W2421110331","https://openalex.org/W2461816915","https://openalex.org/W2509609884","https://openalex.org/W2516930597","https://openalex.org/W2557461283","https://openalex.org/W2581267621","https://openalex.org/W2588069262","https://openalex.org/W2606117036","https://openalex.org/W2607053277","https://openalex.org/W2607289391","https://openalex.org/W2607468485","https://openalex.org/W2704544833","https://openalex.org/W2728446593","https://openalex.org/W2740650844","https://openalex.org/W2754381541","https://openalex.org/W2766678031","https://openalex.org/W2767511751","https://openalex.org/W2770617382","https://openalex.org/W2783405072","https://openalex.org/W2788996824","https://openalex.org/W2789263027","https://openalex.org/W2789458942","https://openalex.org/W2789893667","https://openalex.org/W2790257300","https://openalex.org/W2791541123","https://openalex.org/W2793754788","https://openalex.org/W2801802266","https://openalex.org/W2804974507","https://openalex.org/W2806312433","https://openalex.org/W2807776514","https://openalex.org/W2807932277","https://openalex.org/W2808349081","https://openalex.org/W2884613865","https://openalex.org/W2898162108","https://openalex.org/W2898441677","https://openalex.org/W2905776180","https://openalex.org/W2907425647","https://openalex.org/W2908060726","https://openalex.org/W2909532859","https://openalex.org/W2911701110","https://openalex.org/W2915036452","https://openalex.org/W2921139906","https://openalex.org/W2921570248","https://openalex.org/W2934018098","https://openalex.org/W2934414324","https://openalex.org/W2936430649","https://openalex.org/W2939524192","https://openalex.org/W2943196664","https://openalex.org/W2946148891","https://openalex.org/W2950793774","https://openalex.org/W2951973051","https://openalex.org/W2954057334","https://openalex.org/W2954722142","https://openalex.org/W2955772300","https://openalex.org/W2955880496","https://openalex.org/W2961921276","https://openalex.org/W2962578095","https://openalex.org/W2962954046","https://openalex.org/W2963965575","https://openalex.org/W2966345143","https://openalex.org/W2969316793","https://openalex.org/W2972477733","https://openalex.org/W2977837933","https://openalex.org/W2981630306","https://openalex.org/W2983006427","https://openalex.org/W2990191990","https://openalex.org/W2990921577","https://openalex.org/W2993097741","https://openalex.org/W2993733558","https://openalex.org/W2995331640","https://openalex.org/W3002216803","https://openalex.org/W3003341304","https://openalex.org/W3004690350","https://openalex.org/W3005146403","https://openalex.org/W3005454478","https://openalex.org/W3006558684","https://openalex.org/W3008479101","https://openalex.org/W3009542218","https://openalex.org/W3009595334","https://openalex.org/W3011650416","https://openalex.org/W3011936643","https://openalex.org/W3014183340","https://openalex.org/W3021378734","https://openalex.org/W3023758807","https://openalex.org/W3024829012","https://openalex.org/W3033247292","https://openalex.org/W3034495099","https://openalex.org/W3034951048","https://openalex.org/W3034965111","https://openalex.org/W3041819425","https://openalex.org/W3042512761","https://openalex.org/W3046140999","https://openalex.org/W3046644316","https://openalex.org/W3049111883","https://openalex.org/W3049549732","https://openalex.org/W3082088464","https://openalex.org/W3082652313","https://openalex.org/W3093147169","https://openalex.org/W3094232439","https://openalex.org/W3094373425","https://openalex.org/W3094515431","https://openalex.org/W3094519163","https://openalex.org/W3099363381","https://openalex.org/W3100573046","https://openalex.org/W3102847274","https://openalex.org/W3104726005","https://openalex.org/W3105015692","https://openalex.org/W3106511314","https://openalex.org/W3106946708","https://openalex.org/W3108009838","https://openalex.org/W3108211699","https://openalex.org/W3108546602","https://openalex.org/W3108856816","https://openalex.org/W3112176637","https://openalex.org/W3118427809","https://openalex.org/W3127714742","https://openalex.org/W3132635576","https://openalex.org/W3155150319","https://openalex.org/W3162588581","https://openalex.org/W3168668302","https://openalex.org/W3171055073","https://openalex.org/W3171690063","https://openalex.org/W3171876577","https://openalex.org/W3194274583","https://openalex.org/W4200606070","https://openalex.org/W4255952900"],"related_works":[],"abstract_inverted_index":null,"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-03-31T00:00:00"}
