{"id":"https://openalex.org/W2890607810","doi":"https://doi.org/10.1007/s11280-018-0634-6","title":"Large-scale holistic approach to Web block classification: assembling the jigsaws of a Web page puzzle","display_name":"Large-scale holistic approach to Web block classification: assembling the jigsaws of a Web page puzzle","publication_year":2018,"publication_date":"2018-09-12","ids":{"openalex":"https://openalex.org/W2890607810","doi":"https://doi.org/10.1007/s11280-018-0634-6","mag":"2890607810"},"language":"en","primary_location":{"id":"doi:10.1007/s11280-018-0634-6","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11280-018-0634-6","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11280-018-0634-6.pdf","source":{"id":"https://openalex.org/S129236917","display_name":"World Wide Web","issn_l":"1386-145X","issn":["1386-145X","1573-1413"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"World Wide Web","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s11280-018-0634-6.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065777566","display_name":"Andrey Kravchenko","orcid":"https://orcid.org/0000-0002-9066-7743"},"institutions":[{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Andrey Kravchenko","raw_affiliation_strings":["Department of Computer Science, University of Oxford, Oxford, England"],"raw_orcid":"https://orcid.org/0000-0002-9066-7743","affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Oxford, Oxford, England","institution_ids":["https://openalex.org/I40120149"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5065777566"],"corresponding_institution_ids":["https://openalex.org/I40120149"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":1.2333,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.86002393,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"22","issue":"5","first_page":"1999","last_page":"2015"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9758999943733215,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8686853647232056},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.6386039853096008},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.603512704372406},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5050264000892639},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.4580245912075043},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.40844911336898804},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0607147216796875},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.04883924126625061}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8686853647232056},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.6386039853096008},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.603512704372406},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5050264000892639},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.4580245912075043},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.40844911336898804},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0607147216796875},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.04883924126625061},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s11280-018-0634-6","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11280-018-0634-6","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11280-018-0634-6.pdf","source":{"id":"https://openalex.org/S129236917","display_name":"World Wide Web","issn_l":"1386-145X","issn":["1386-145X","1573-1413"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"World Wide Web","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s11280-018-0634-6","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11280-018-0634-6","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11280-018-0634-6.pdf","source":{"id":"https://openalex.org/S129236917","display_name":"World Wide Web","issn_l":"1386-145X","issn":["1386-145X","1573-1413"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"World Wide Web","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8400180575","display_name":null,"funder_award_id":"EP/M025268/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2890607810.pdf","grobid_xml":"https://content.openalex.org/works/W2890607810.grobid-xml"},"referenced_works_count":32,"referenced_works":["https://openalex.org/W1486487250","https://openalex.org/W1558832481","https://openalex.org/W1969192670","https://openalex.org/W2000745862","https://openalex.org/W2012575882","https://openalex.org/W2015933107","https://openalex.org/W2029331575","https://openalex.org/W2030908643","https://openalex.org/W2036265926","https://openalex.org/W2045717510","https://openalex.org/W2051141368","https://openalex.org/W2065507241","https://openalex.org/W2083413555","https://openalex.org/W2100184871","https://openalex.org/W2102261150","https://openalex.org/W2119602842","https://openalex.org/W2121137265","https://openalex.org/W2121751007","https://openalex.org/W2125055259","https://openalex.org/W2127498257","https://openalex.org/W2140204390","https://openalex.org/W2143578854","https://openalex.org/W2148317291","https://openalex.org/W2152956989","https://openalex.org/W2160189941","https://openalex.org/W2164565864","https://openalex.org/W2401610261","https://openalex.org/W2464591306","https://openalex.org/W2541143907","https://openalex.org/W2594674417","https://openalex.org/W2787050529","https://openalex.org/W4238669794"],"related_works":["https://openalex.org/W2411679502","https://openalex.org/W2513545296","https://openalex.org/W2144190808","https://openalex.org/W2592441986","https://openalex.org/W67510309","https://openalex.org/W2383869160","https://openalex.org/W3216588747","https://openalex.org/W2110357112","https://openalex.org/W2044968286","https://openalex.org/W2051135816"],"abstract_inverted_index":{"Web":[0,40,45,59,159],"blocks":[1,23,214],"are":[2],"ubiquitous":[3],"across":[4],"the":[5,56,96,101,106,131,138,179,184,198,206,221],"Web.":[6],"Navigation":[7],"menus,":[8],"advertisements,":[9],"headers,":[10],"footers,":[11],"and":[12,44,105,145,212,215],"sidebars":[13],"can":[14,24],"be":[15,25],"found":[16],"almost":[17],"on":[18,65],"any":[19],"website.":[20],"Identifying":[21],"these":[22,117,228],"of":[26,58,68,74,80,93,113,130,147,167,201,210,223,227],"significant":[27],"importance":[28],"for":[29,193,225],"tasks":[30],"such":[31],"as":[32],"wrapper":[33],"induction,":[34],"assistance":[35],"to":[36,55,71,116,125,158,183,220],"visually":[37],"impaired":[38],"people,":[39],"page":[41,132],"topic":[42],"clustering,":[43],"search":[46],"among":[47],"a":[48,127,165,217],"few.":[49],"There":[50],"have":[51],"been":[52],"several":[53],"approaches":[54],"problem":[57,222],"block":[60,135,160,171,229],"classification,":[61],"but":[62],"they":[63],"focused":[64],"specific":[66],"types":[67],"blocks,":[69],"trying":[70],"classify":[72],"all":[73,134],"them":[75],"with":[76,95,142,178,197],"one":[77],"single":[78],"set":[79,92],"features.":[81],"In":[82,120],"our":[83,121,170],"approach":[84,122,157,181],"each":[85,143],"classifier":[86],"has":[87],"its":[88],"own":[89],"unique":[90],"extendable":[91],"features,":[94],"features":[97],"being":[98],"extracted":[99],"through":[100,111,152,164],"declarative-based":[102],"BERyL":[103,185],"language,":[104],"classification":[107,139,161,172,186,209,224],"itself":[108],"is":[109,162],"done":[110],"application":[112],"machine":[114],"learning":[115],"feature":[118],"sets.":[119],"we":[123],"propose":[124,216],"take":[126],"holistic":[128,156,180],"view":[129],"where":[133],"classifiers":[136,149],"in":[137,169],"system":[140,166,173,187],"interact":[141],"other,":[144],"accuracies":[146],"individual":[148,194],"get":[150],"improved":[151],"this":[153],"interaction.":[154],"The":[155,175],"implemented":[163],"constraints":[168],"BERyL.":[174],"evaluation":[176],"results":[177,191],"applied":[182],"achieve":[188],"higher":[189],"F1":[190,200],"than":[192],"non-connected":[195],"classifiers,":[196],"average":[199],"98%.":[202],"We":[203],"also":[204],"consider":[205],"distinction":[207],"between":[208],"domain-independent":[211],"domain-dependent":[213],"large-scale":[218],"solution":[219],"both":[226],"types.":[230]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
