{"id":"https://openalex.org/W4386001693","doi":"https://doi.org/10.1145/3616849","title":"Scraping Relevant Images from Web Pages without Download","display_name":"Scraping Relevant Images from Web Pages without Download","publication_year":2023,"publication_date":"2023-08-19","ids":{"openalex":"https://openalex.org/W4386001693","doi":"https://doi.org/10.1145/3616849"},"language":"en","primary_location":{"id":"doi:10.1145/3616849","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3616849","pdf_url":null,"source":{"id":"https://openalex.org/S131231701","display_name":"ACM Transactions on the Web","issn_l":"1559-1131","issn":["1559-1131","1559-114X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on the Web","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078816512","display_name":"Erdin\u00e7 Uzun","orcid":"https://orcid.org/0000-0003-4351-2244"},"institutions":[{"id":"https://openalex.org/I125454184","display_name":"Tekirda\u011f Nam\u0131k Kemal University","ror":"https://ror.org/01a0mk874","country_code":"TR","type":"education","lineage":["https://openalex.org/I125454184"]}],"countries":["TR"],"is_corresponding":true,"raw_author_name":"Erdin\u00e7 Uzun","raw_affiliation_strings":["Tekirda\u011f Nam\u0131k Kemal University, Turkey"],"raw_orcid":"https://orcid.org/0000-0003-4351-2244","affiliations":[{"raw_affiliation_string":"Tekirda\u011f Nam\u0131k Kemal University, Turkey","institution_ids":["https://openalex.org/I125454184"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5078816512"],"corresponding_institution_ids":["https://openalex.org/I125454184"],"apc_list":null,"apc_paid":null,"fwci":1.7496,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.88020211,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"18","issue":"1","first_page":"1","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9836000204086304,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9491999745368958,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8512736558914185},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.6530069708824158},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.588240385055542},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5717065930366516},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5322017073631287},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.489795446395874},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4067786931991577},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3587566614151001},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.35591939091682434},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.35089045763015747}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8512736558914185},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.6530069708824158},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.588240385055542},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5717065930366516},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5322017073631287},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.489795446395874},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4067786931991577},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3587566614151001},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.35591939091682434},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.35089045763015747},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3616849","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3616849","pdf_url":null,"source":{"id":"https://openalex.org/S131231701","display_name":"ACM Transactions on the Web","issn_l":"1559-1131","issn":["1559-1131","1559-114X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on the Web","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1559557735","https://openalex.org/W1647671624","https://openalex.org/W2014858859","https://openalex.org/W2037959103","https://openalex.org/W2040075907","https://openalex.org/W2049488566","https://openalex.org/W2063758007","https://openalex.org/W2065568440","https://openalex.org/W2120101509","https://openalex.org/W2124157324","https://openalex.org/W2129595335","https://openalex.org/W2153072229","https://openalex.org/W2168358004","https://openalex.org/W2253768319","https://openalex.org/W2293417614","https://openalex.org/W2340809461","https://openalex.org/W2467373585","https://openalex.org/W2514393974","https://openalex.org/W2543315719","https://openalex.org/W2612984812","https://openalex.org/W2741613270","https://openalex.org/W2808777947","https://openalex.org/W2912318878","https://openalex.org/W2912637920","https://openalex.org/W2913410345","https://openalex.org/W2921404976","https://openalex.org/W2962717349","https://openalex.org/W2964079897","https://openalex.org/W2969078176","https://openalex.org/W2981506571","https://openalex.org/W2982332362","https://openalex.org/W2993537585","https://openalex.org/W3006281832","https://openalex.org/W3008881932","https://openalex.org/W3104726005","https://openalex.org/W3157731560","https://openalex.org/W3164753811","https://openalex.org/W3174641682","https://openalex.org/W3175234726","https://openalex.org/W3194566690","https://openalex.org/W4210397192","https://openalex.org/W4231080135","https://openalex.org/W4297816490","https://openalex.org/W4297944103","https://openalex.org/W4313635839","https://openalex.org/W4316042130"],"related_works":["https://openalex.org/W2366107444","https://openalex.org/W4388145910","https://openalex.org/W1976205134","https://openalex.org/W2381570729","https://openalex.org/W4248336175","https://openalex.org/W3009369890","https://openalex.org/W2031260042","https://openalex.org/W2391445434","https://openalex.org/W4312490297","https://openalex.org/W2062212388"],"abstract_inverted_index":{"Automatically":[0],"scraping":[1,26,281],"relevant":[2,120,161,252],"images":[3,150,253,271],"from":[4,108,137,151,204],"web":[5,25,106,214,232,247,280],"pages":[6,107,114,233,248],"is":[7,37,197,285],"an":[8,85,221],"error-prone":[9],"and":[10,39,54,61,68,91,100,111,173,262,272],"time-consuming":[11,69],"task,":[12],"leading":[13],"experts":[14],"to":[15,70,118,128,244,249],"prefer":[16],"manually":[17],"preparing":[18],"extraction":[19,44,188],"patterns":[20],"for":[21,115,169,226,254],"a":[22,47,78,93,109,116,130,146,200,205,240],"website.":[23,236],"Existing":[24],"tools":[27],"are":[28],"built":[29],"on":[30,134,287],"these":[31,74,126],"patterns.":[32],"However,":[33],"this":[34],"manual":[35],"approach":[36,80,103,123,219,258],"laborious":[38],"requires":[40],"specialized":[41],"knowledge.":[42],"Automatic":[43],"approaches,":[45,189],"while":[46,97],"potential":[48],"solution,":[49],"require":[50,84],"large":[51,206],"training":[52,89,207],"datasets":[53],"numerous":[55],"features,":[56],"including":[57],"width,":[58],"height,":[59],"pixels,":[60],"file":[62],"size,":[63],"that":[64,81,193,239],"can":[65,194,273],"be":[66,195,274],"difficult":[67],"obtain.":[71],"To":[72],"address":[73],"challenges,":[75],"we":[76,144],"propose":[77],"semi-automatic":[79],"does":[82],"not":[83,266],"expert,":[86],"utilizes":[87],"small":[88],"datasets,":[90],"has":[92],"low":[94],"error":[95],"rate":[96],"saving":[98],"time":[99,261],"storage.":[101],"Our":[102,257],"involves":[104],"clustering":[105],"website":[110],"suggesting":[112],"several":[113,165],"non-expert":[117,241],"annotate":[119],"images.":[121,162],"The":[122],"then":[124],"uses":[125],"annotations":[127],"construct":[129],"learning":[131,167,201],"model":[132,202],"based":[133,286],"textual":[135,288],"data":[136],"the":[138,142,177,181,190,251,268],"HTML":[139],"elements.":[140],"In":[141,216],"experiments,":[143],"used":[145],"dataset":[147,208],"of":[148,210,224,270],"635,015":[149],"200":[152,227,255],"news":[153],"websites,":[154],"each":[155],"containing":[156],"100":[157],"pages,":[158],"with":[159,199,229],"22,632":[160],"When":[163,185],"comparing":[164],"machine":[166],"methods":[168],"both":[170],"automatic":[171,187],"approaches":[172],"our":[174,218],"proposed":[175],"approach,":[176],"AdaBoost":[178],"method":[179],"yields":[180],"best":[182,191],"performance":[183],"results.":[184],"using":[186],"f-Measure":[192,223],"achieved":[196,220],"0.805":[198],"constructed":[203],"consisting":[209],"120":[211],"websites":[212,228],"(12,000":[213],"pages).":[215],"contrast,":[217],"average":[222],"0.958":[225],"only":[230,242],"six":[231],"annotated":[234],"per":[235],"This":[237],"means":[238],"needs":[243],"examine":[245],"1,200":[246],"determine":[250],"websites.":[256],"also":[259],"saves":[260],"storage":[263],"space":[264],"by":[265],"requiring":[267],"download":[269],"easily":[275],"integrated":[276],"into":[277],"currently":[278],"available":[279],"tools,":[282],"because":[283],"it":[284],"data.":[289]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
