{"id":"https://openalex.org/W4415222182","doi":"https://doi.org/10.1145/3730567.3732913","title":"Somesite I Used To Crawl: Awareness, Agency and Efficacy in Protecting Content Creators From AI Crawlers","display_name":"Somesite I Used To Crawl: Awareness, Agency and Efficacy in Protecting Content Creators From AI Crawlers","publication_year":2025,"publication_date":"2025-10-15","ids":{"openalex":"https://openalex.org/W4415222182","doi":"https://doi.org/10.1145/3730567.3732913"},"language":null,"primary_location":{"id":"doi:10.1145/3730567.3732913","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3730567.3732913","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3730567.3732913","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Internet Measurement Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3730567.3732913","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000651880","display_name":"Enze Liu","orcid":"https://orcid.org/0000-0003-4288-8485"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Enze Liu","raw_affiliation_strings":["UC San Diego, La Jolla, CA, USA"],"affiliations":[{"raw_affiliation_string":"UC San Diego, La Jolla, CA, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039158803","display_name":"Erding Luo","orcid":null},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Elisa Luo","raw_affiliation_strings":["UC San Diego, La Jolla, CA, USA"],"affiliations":[{"raw_affiliation_string":"UC San Diego, La Jolla, CA, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076606321","display_name":"Shawn Shan","orcid":"https://orcid.org/0009-0005-4324-7817"},"institutions":[{"id":"https://openalex.org/I39422238","display_name":"University of Illinois Chicago","ror":"https://ror.org/02mpq6x41","country_code":"US","type":"education","lineage":["https://openalex.org/I39422238"]},{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shawn Shan","raw_affiliation_strings":["University of Chicago, Chicago, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, IL, USA","institution_ids":["https://openalex.org/I40347166","https://openalex.org/I39422238"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081366293","display_name":"Geoffrey M. Voelker","orcid":"https://orcid.org/0000-0003-0865-7499"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Geoffrey M. Voelker","raw_affiliation_strings":["UC San Diego, La Jolla, CA, USA"],"affiliations":[{"raw_affiliation_string":"UC San Diego, La Jolla, CA, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108248360","display_name":"Ben Y. Zhao","orcid":"https://orcid.org/0009-0003-8909-0494"},"institutions":[{"id":"https://openalex.org/I39422238","display_name":"University of Illinois Chicago","ror":"https://ror.org/02mpq6x41","country_code":"US","type":"education","lineage":["https://openalex.org/I39422238"]},{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ben Y. Zhao","raw_affiliation_strings":["University of Chicago, Chicago, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Chicago, Chicago, IL, USA","institution_ids":["https://openalex.org/I40347166","https://openalex.org/I39422238"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048394730","display_name":"Stefan Savage","orcid":"https://orcid.org/0000-0001-6617-8029"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Stefan Savage","raw_affiliation_strings":["UC San Diego, La Jolla, CA, USA"],"affiliations":[{"raw_affiliation_string":"UC San Diego, La Jolla, CA, USA","institution_ids":["https://openalex.org/I36258959"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5000651880"],"corresponding_institution_ids":["https://openalex.org/I36258959"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.35138048,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"78","last_page":"99"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9765999913215637,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12034","display_name":"Digital and Cyber Forensics","score":0.9661999940872192,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/web-crawler","display_name":"Web crawler","score":0.8723999857902527},{"id":"https://openalex.org/keywords/agency","display_name":"Agency (philosophy)","score":0.6010000109672546},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.5841000080108643},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4602000117301941},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.4487999975681305},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.4058000147342682},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.40560001134872437}],"concepts":[{"id":"https://openalex.org/C13743948","wikidata":"https://www.wikidata.org/wiki/Q45842","display_name":"Web crawler","level":2,"score":0.8723999857902527},{"id":"https://openalex.org/C108170787","wikidata":"https://www.wikidata.org/wiki/Q3951828","display_name":"Agency (philosophy)","level":2,"score":0.6010000109672546},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.5841000080108643},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4629000127315521},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4607999920845032},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4602000117301941},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.4487999975681305},{"id":"https://openalex.org/C108827166","wikidata":"https://www.wikidata.org/wiki/Q175975","display_name":"Internet privacy","level":1,"score":0.4065000116825104},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.4058000147342682},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.40560001134872437},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.3758000135421753},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.3716999888420105},{"id":"https://openalex.org/C41065033","wikidata":"https://www.wikidata.org/wiki/Q2825412","display_name":"Adversary","level":2,"score":0.35100001096725464},{"id":"https://openalex.org/C148417208","wikidata":"https://www.wikidata.org/wiki/Q4825882","display_name":"Authentication (law)","level":2,"score":0.3481000065803528},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3255999982357025},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.32170000672340393},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3041999936103821},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.29440000653266907},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.29409998655319214},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.259799987077713},{"id":"https://openalex.org/C93996380","wikidata":"https://www.wikidata.org/wiki/Q44127","display_name":"Server","level":2,"score":0.25920000672340393}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3730567.3732913","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3730567.3732913","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3730567.3732913","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Internet Measurement Conference","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3730567.3732913","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3730567.3732913","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3730567.3732913","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Internet Measurement Conference","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5362012875","display_name":"SaTC: CORE: Medium: Digital Forensics for Deep Neural Networks","funder_award_id":"2241303","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8876996369","display_name":null,"funder_award_id":"N00014","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332603","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48"},{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415222182.pdf","grobid_xml":"https://content.openalex.org/works/W4415222182.grobid-xml"},"referenced_works_count":21,"referenced_works":["https://openalex.org/W1979290264","https://openalex.org/W1980531291","https://openalex.org/W1987459427","https://openalex.org/W2061751863","https://openalex.org/W2084585486","https://openalex.org/W2110989128","https://openalex.org/W2137568494","https://openalex.org/W2162549927","https://openalex.org/W2955221586","https://openalex.org/W2964268190","https://openalex.org/W2980978493","https://openalex.org/W3130295820","https://openalex.org/W3155048132","https://openalex.org/W3155052204","https://openalex.org/W3210168563","https://openalex.org/W4229440802","https://openalex.org/W4316135693","https://openalex.org/W4391330472","https://openalex.org/W4396615223","https://openalex.org/W4399364455","https://openalex.org/W4411534815"],"related_works":[],"abstract_inverted_index":{"The":[0],"success":[1],"of":[2,15,51,78,126,189],"generative":[3],"AI":[4,180],"relies":[5],"heavily":[6],"on":[7,9],"training":[8],"data":[10],"scraped":[11],"through":[12],"extensive":[13],"crawling":[14],"the":[16,74,98],"Internet,":[17],"a":[18,36,42,49,122],"practice":[19],"that":[20],"has":[21],"raised":[22],"significant":[23],"copyright,":[24],"privacy,":[25],"and":[26,60,76,101,111,121,151,160],"ethical":[27],"concerns.":[28],"While":[29],"few":[30],"measures":[31],"are":[32],"designed":[33],"to":[34,40,72,82,103],"resist":[35],"resource-rich":[37],"adversary":[38],"determined":[39],"scrape":[41],"site,":[43],"crawlers":[44],"can":[45,112],"be":[46,115],"impacted":[47],"by":[48,64,141,167],"range":[50],"existing":[52],"tools":[53,81,107,114,135],"such":[54,108,113],"as":[55,109],"robots.txt,":[56,110,137],"NoAI":[57],"meta":[58],"tags,":[59],"active":[61],"crawler":[62,105,164],"blocking":[63,106],"reverse":[65,168],"proxies.":[66,169],"In":[67],"this":[68],"work,":[69],"we":[70,130],"seek":[71],"understand":[73],"ability":[75],"efficacy":[77,153],"today's":[79],"networking":[80],"protect":[83],"content":[84],"creators":[85],"against":[86,154,179],"AI-related":[87],"crawling.":[88],"For":[89],"targeted":[90,123],"populations":[91],"like":[92,136],"human":[93],"artists,":[94,129],"do":[95],"they":[96,175],"have":[97],"technical":[99,145],"knowledge":[100],"agency":[102,147],"utilize":[104],"effective?":[116],"Using":[117],"large":[118],"scale":[119],"measurements":[120],"user":[124],"study":[125],"203":[127],"professional":[128],"find":[131],"strong":[132],"demand":[133],"for":[134],"but":[138,182],"significantly":[139],"constrained":[140],"critical":[142],"hurdles":[143],"in":[144,148],"awareness,":[146],"deploying":[149],"them,":[150],"limited":[152,172],"unresponsive":[155],"crawlers.":[156],"We":[157],"further":[158],"test":[159],"evaluate":[161],"network":[162],"level":[163],"blockers":[165],"provided":[166],"Despite":[170],"relatively":[171],"deployment":[173],"today,":[174],"offer":[176],"stronger":[177],"protections":[178],"crawlers,":[181],"still":[183],"come":[184],"with":[185],"their":[186],"own":[187],"set":[188],"limitations.":[190]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-22T00:00:00"}
