{"id":"https://openalex.org/W4416422179","doi":"https://doi.org/10.1177/20539517251381686","title":"Web scraping for research: Legal, ethical, institutional, and scientific considerations","display_name":"Web scraping for research: Legal, ethical, institutional, and scientific considerations","publication_year":2025,"publication_date":"2025-11-20","ids":{"openalex":"https://openalex.org/W4416422179","doi":"https://doi.org/10.1177/20539517251381686"},"language":"en","primary_location":{"id":"doi:10.1177/20539517251381686","is_oa":true,"landing_page_url":"https://doi.org/10.1177/20539517251381686","pdf_url":null,"source":{"id":"https://openalex.org/S2736409588","display_name":"Big Data & Society","issn_l":"2053-9517","issn":["2053-9517"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data &amp; Society","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1177/20539517251381686","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072261480","display_name":"Megan A. Brown","orcid":"https://orcid.org/0000-0002-1338-8054"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Megan A Brown","raw_affiliation_strings":["University of Michigan","School of Information, University of Michigan, Ann Arbor, MI, USA"],"raw_orcid":"https://orcid.org/0000-0002-1338-8054","affiliations":[{"raw_affiliation_string":"University of Michigan","institution_ids":["https://openalex.org/I27837315"]},{"raw_affiliation_string":"School of Information, University of Michigan, Ann Arbor, MI, USA","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114635414","display_name":"Andrew Gruen","orcid":"https://orcid.org/0009-0006-6516-9730"},"institutions":[{"id":"https://openalex.org/I197817023","display_name":"International Paper (United States)","ror":"https://ror.org/03q6w3828","country_code":"US","type":"company","lineage":["https://openalex.org/I197817023"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrew Gruen","raw_affiliation_strings":["Working Paper, LLC, New York, NY, USA"],"raw_orcid":"https://orcid.org/0009-0006-6516-9730","affiliations":[{"raw_affiliation_string":"Working Paper, LLC, New York, NY, USA","institution_ids":["https://openalex.org/I197817023"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114635415","display_name":"Gabe Maldoff","orcid":"https://orcid.org/0009-0003-0867-4771"},"institutions":[{"id":"https://openalex.org/I2799962816","display_name":"University of Maine School of Law","ror":"https://ror.org/04625j688","country_code":"US","type":"education","lineage":["https://openalex.org/I2799962816","https://openalex.org/I2802397601"]},{"id":"https://openalex.org/I7947594","display_name":"University of Maine","ror":"https://ror.org/01adr0w49","country_code":"US","type":"education","lineage":["https://openalex.org/I2802397601","https://openalex.org/I7947594"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gabe Maldoff","raw_affiliation_strings":["University of Maine,","School of Law, University of Maine, Portland, ME, USA"],"raw_orcid":"https://orcid.org/0009-0003-0867-4771","affiliations":[{"raw_affiliation_string":"University of Maine,","institution_ids":["https://openalex.org/I7947594"]},{"raw_affiliation_string":"School of Law, University of Maine, Portland, ME, USA","institution_ids":["https://openalex.org/I2799962816"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069884896","display_name":"Solomon Messing","orcid":"https://orcid.org/0000-0002-0109-4040"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Solomon Messing","raw_affiliation_strings":["New York University","Center for Social Media and Politics, New York University, New York, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"New York University","institution_ids":["https://openalex.org/I57206974"]},{"raw_affiliation_string":"Center for Social Media and Politics, New York University, New York, NY, USA","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057729251","display_name":"Zeve Sanderson","orcid":"https://orcid.org/0009-0004-8224-9754"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zeve Sanderson","raw_affiliation_strings":["New York University","Center for Social Media and Politics, New York University, New York, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"New York University","institution_ids":["https://openalex.org/I57206974"]},{"raw_affiliation_string":"Center for Social Media and Politics, New York University, New York, NY, USA","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020515133","display_name":"Michael Zimmer","orcid":"https://orcid.org/0000-0003-4229-4847"},"institutions":[{"id":"https://openalex.org/I102461120","display_name":"Marquette University","ror":"https://ror.org/04gr4te78","country_code":"US","type":"education","lineage":["https://openalex.org/I102461120"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Zimmer","raw_affiliation_strings":["Marquette University","Department of Computer Science, Marquette University, Milwaukee, WI, USA"],"raw_orcid":"https://orcid.org/0000-0003-4229-4847","affiliations":[{"raw_affiliation_string":"Marquette University","institution_ids":["https://openalex.org/I102461120"]},{"raw_affiliation_string":"Department of Computer Science, Marquette University, Milwaukee, WI, USA","institution_ids":["https://openalex.org/I102461120"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5072261480"],"corresponding_institution_ids":["https://openalex.org/I27837315"],"apc_list":{"value":800,"currency":"USD","value_usd":800},"apc_paid":{"value":800,"currency":"USD","value_usd":800},"fwci":46.2598,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.99816012,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":100},"biblio":{"volume":"12","issue":"4","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.18250000476837158,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.18250000476837158,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13976","display_name":"Web visibility and informetrics","score":0.18160000443458557,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.17630000412464142,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.5885000228881836},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.3856000006198883},{"id":"https://openalex.org/keywords/web-intelligence","display_name":"Web intelligence","score":0.38100001215934753},{"id":"https://openalex.org/keywords/web-application","display_name":"Web application","score":0.36480000615119934},{"id":"https://openalex.org/keywords/ethical-issues","display_name":"Ethical issues","score":0.35760000348091125},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.3260999917984009},{"id":"https://openalex.org/keywords/data-access","display_name":"Data access","score":0.3066999912261963},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.3046000003814697}],"concepts":[{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.5885000228881836},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.573199987411499},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5619000196456909},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4327999949455261},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.3856000006198883},{"id":"https://openalex.org/C544335954","wikidata":"https://www.wikidata.org/wiki/Q2553348","display_name":"Web intelligence","level":4,"score":0.38100001215934753},{"id":"https://openalex.org/C118643609","wikidata":"https://www.wikidata.org/wiki/Q189210","display_name":"Web application","level":2,"score":0.36480000615119934},{"id":"https://openalex.org/C2986663376","wikidata":"https://www.wikidata.org/wiki/Q9465","display_name":"Ethical issues","level":2,"score":0.35760000348091125},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.3260999917984009},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.3066999912261963},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.3046000003814697},{"id":"https://openalex.org/C55587333","wikidata":"https://www.wikidata.org/wiki/Q1133029","display_name":"Engineering ethics","level":1,"score":0.30070000886917114},{"id":"https://openalex.org/C126349790","wikidata":"https://www.wikidata.org/wiki/Q905036","display_name":"Computational sociology","level":2,"score":0.3003999888896942},{"id":"https://openalex.org/C134531850","wikidata":"https://www.wikidata.org/wiki/Q6056040","display_name":"Internet research","level":3,"score":0.29109999537467957},{"id":"https://openalex.org/C108827166","wikidata":"https://www.wikidata.org/wiki/Q175975","display_name":"Internet privacy","level":1,"score":0.29089999198913574},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.28380000591278076},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.2752000093460083},{"id":"https://openalex.org/C3020774429","wikidata":"https://www.wikidata.org/wiki/Q1201886","display_name":"Web of science","level":3,"score":0.2736000120639801},{"id":"https://openalex.org/C2781083858","wikidata":"https://www.wikidata.org/wiki/Q17327049","display_name":"Scientific literature","level":2,"score":0.2623000144958496},{"id":"https://openalex.org/C137822555","wikidata":"https://www.wikidata.org/wiki/Q2587068","display_name":"Information sensitivity","level":2,"score":0.25870001316070557},{"id":"https://openalex.org/C2779494480","wikidata":"https://www.wikidata.org/wiki/Q188728","display_name":"Information Dissemination","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C123201435","wikidata":"https://www.wikidata.org/wiki/Q456632","display_name":"Information privacy","level":2,"score":0.25450000166893005},{"id":"https://openalex.org/C2776543384","wikidata":"https://www.wikidata.org/wiki/Q593289","display_name":"Information access","level":2,"score":0.2524000108242035}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1177/20539517251381686","is_oa":true,"landing_page_url":"https://doi.org/10.1177/20539517251381686","pdf_url":null,"source":{"id":"https://openalex.org/S2736409588","display_name":"Big Data & Society","issn_l":"2053-9517","issn":["2053-9517"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data &amp; Society","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:38c4d1cf552f4b8eb150743d5daddf84","is_oa":true,"landing_page_url":"https://doaj.org/article/38c4d1cf552f4b8eb150743d5daddf84","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Big Data & Society, Vol 12 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1177/20539517251381686","is_oa":true,"landing_page_url":"https://doi.org/10.1177/20539517251381686","pdf_url":null,"source":{"id":"https://openalex.org/S2736409588","display_name":"Big Data & Society","issn_l":"2053-9517","issn":["2053-9517"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Big Data &amp; Society","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1529013893","https://openalex.org/W2003733012","https://openalex.org/W2363947341","https://openalex.org/W2547108976","https://openalex.org/W2598912916","https://openalex.org/W2750511870","https://openalex.org/W2773153336","https://openalex.org/W2793785670","https://openalex.org/W2799705784","https://openalex.org/W2898883664","https://openalex.org/W2955221586","https://openalex.org/W3002330681","https://openalex.org/W3020360782","https://openalex.org/W3039560364","https://openalex.org/W3091971295","https://openalex.org/W3122605581","https://openalex.org/W3171690063","https://openalex.org/W3189849087","https://openalex.org/W3212368439","https://openalex.org/W3212464620","https://openalex.org/W3217570815","https://openalex.org/W4205362630","https://openalex.org/W4213348855","https://openalex.org/W4224316062","https://openalex.org/W4224319565","https://openalex.org/W4294190797","https://openalex.org/W4296959790","https://openalex.org/W4312949807","https://openalex.org/W4322500410","https://openalex.org/W4386503771","https://openalex.org/W4387064995","https://openalex.org/W4388218284","https://openalex.org/W4389991891","https://openalex.org/W4390038780","https://openalex.org/W4392014913"],"related_works":[],"abstract_inverted_index":{"Scientists":[0],"across":[1],"disciplines":[2],"often":[3],"use":[4],"data":[5,37,115,156],"from":[6],"the":[7,79,93,100,140,148],"internet":[8],"to":[9,36,52,124,136,143],"conduct":[10,125],"research,":[11],"generating":[12],"valuable":[13],"insights":[14],"about":[15],"human":[16],"behavior.":[17],"However,":[18],"as":[19],"generative":[20],"artificial":[21],"intelligence":[22],"relying":[23],"on":[24],"massive":[25],"text":[26],"corpora":[27],"becomes":[28],"increasingly":[29],"valuable,":[30],"platforms":[31],"have":[32],"greatly":[33],"restricted":[34],"access":[35,157],"through":[38],"official":[39],"channels.":[40],"As":[41],"a":[42,65,128],"result,":[43],"researchers":[44,89,108,121,138],"will":[45],"likely":[46],"engage":[47],"in":[48,71,127],"more":[49],"web":[50,69],"scraping":[51,70,92,126],"collect":[53],"data,":[54],"introducing":[55],"new":[56],"challenges":[57],"and":[58,83,106,113,131,146],"concerns":[59],"for":[60,68,75],"researchers.":[61],"This":[62],"paper":[63],"proposes":[64],"comprehensive":[66],"framework":[67],"social":[72],"science":[73],"research":[74,152],"U.S.-based":[76],"researchers,":[77],"examining":[78],"legal,":[80],"ethical,":[81],"institutional,":[82],"scientific":[84],"factors":[85],"that":[86],"we":[87],"recommend":[88],"consider":[90],"when":[91,105],"web.":[94],"We":[95,118,134],"present":[96],"an":[97],"overview":[98],"of":[99,150],"current":[101],"regulatory":[102],"environment":[103],"impacting":[104],"how":[107],"can":[109],"access,":[110],"collect,":[111],"store,":[112],"share":[114],"via":[116],"scraping.":[117],"then":[119],"provide":[120],"with":[122,139],"recommendations":[123],"scientifically":[129],"legitimate":[130],"ethical":[132],"manner.":[133],"aim":[135],"equip":[137],"relevant":[141],"information":[142],"mitigate":[144],"risks":[145],"maximize":[147],"impact":[149],"their":[151],"amid":[153],"this":[154],"evolving":[155],"landscape.":[158]},"counts_by_year":[{"year":2026,"cited_by_count":19},{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-30T13:55:48.251075","created_date":"2025-11-20T00:00:00"}
