{"id":"https://openalex.org/W2044700791","doi":"https://doi.org/10.1145/2479787.2479792","title":"Cluster-based page segmentation-a fast and precise method for web page pre-processing","display_name":"Cluster-based page segmentation-a fast and precise method for web page pre-processing","publication_year":2013,"publication_date":"2013-06-11","ids":{"openalex":"https://openalex.org/W2044700791","doi":"https://doi.org/10.1145/2479787.2479792","mag":"2044700791"},"language":"en","primary_location":{"id":"doi:10.1145/2479787.2479792","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2479787.2479792","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd International Conference on Web Intelligence, Mining and Semantics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083684509","display_name":"Jan Zelen\u00fd","orcid":null},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":true,"raw_author_name":"Jan Zeleny","raw_affiliation_strings":["Brno University of Technology, Brno, Czech Republic","Brno University of Technology. Brno, Czech Republic"],"affiliations":[{"raw_affiliation_string":"Brno University of Technology, Brno, Czech Republic","institution_ids":["https://openalex.org/I60587646"]},{"raw_affiliation_string":"Brno University of Technology. Brno, Czech Republic","institution_ids":["https://openalex.org/I60587646"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040028014","display_name":"Radek Burget","orcid":"https://orcid.org/0000-0001-5233-0456"},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Radek Burget","raw_affiliation_strings":["Brno University of Technology, Brno, Czech Republic","Brno University of Technology. Brno, Czech Republic"],"affiliations":[{"raw_affiliation_string":"Brno University of Technology, Brno, Czech Republic","institution_ids":["https://openalex.org/I60587646"]},{"raw_affiliation_string":"Brno University of Technology. Brno, Czech Republic","institution_ids":["https://openalex.org/I60587646"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5083684509"],"corresponding_institution_ids":["https://openalex.org/I60587646"],"apc_list":null,"apc_paid":null,"fwci":1.6179,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.88126516,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9850000143051147,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9508000016212463,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.833283543586731},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.741235077381134},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.7056961059570312},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.6989747285842896},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.6216508150100708},{"id":"https://openalex.org/keywords/market-segmentation","display_name":"Market segmentation","score":0.6172648072242737},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.521808385848999},{"id":"https://openalex.org/keywords/web-site","display_name":"Web site","score":0.4704686105251312},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41911154985427856},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.38019225001335144},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2953472137451172},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.2732017934322357}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.833283543586731},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.741235077381134},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.7056961059570312},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.6989747285842896},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.6216508150100708},{"id":"https://openalex.org/C125308379","wikidata":"https://www.wikidata.org/wiki/Q363057","display_name":"Market segmentation","level":2,"score":0.6172648072242737},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.521808385848999},{"id":"https://openalex.org/C2984519610","wikidata":"https://www.wikidata.org/wiki/Q35127","display_name":"Web site","level":3,"score":0.4704686105251312},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41911154985427856},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.38019225001335144},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2953472137451172},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.2732017934322357},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.0},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2479787.2479792","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2479787.2479792","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 3rd International Conference on Web Intelligence, Mining and Semantics","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W1803802947","https://openalex.org/W1890508656","https://openalex.org/W1970243205","https://openalex.org/W1989338554","https://openalex.org/W1992657934","https://openalex.org/W2015551056","https://openalex.org/W2042970189","https://openalex.org/W2072489225","https://openalex.org/W2117209866","https://openalex.org/W2129595335","https://openalex.org/W2134907429","https://openalex.org/W2160189941","https://openalex.org/W2161675403","https://openalex.org/W2169347997","https://openalex.org/W2625590187","https://openalex.org/W6638151354"],"related_works":["https://openalex.org/W2592395359","https://openalex.org/W2535231171","https://openalex.org/W2045342254","https://openalex.org/W1501331687","https://openalex.org/W2326647871","https://openalex.org/W4205247302","https://openalex.org/W2468652214","https://openalex.org/W2501551404","https://openalex.org/W1504527458","https://openalex.org/W2130144716"],"abstract_inverted_index":{"Segmenting":[0],"a":[1,79,96],"web":[2,77,87],"page":[3,138],"may":[4],"be":[5,103],"one":[6,46],"of":[7,10,39,47,65,75,99,108,128],"initial":[8],"steps":[9],"information":[11],"retrieval":[12],"or":[13,37],"content":[14],"classification":[15],"performed":[16,130],"on":[17,35,73,120,131],"that":[18],"page.":[19],"While":[20],"there":[21],"has":[22],"been":[23],"an":[24,59,85],"extensive":[25],"research":[26],"in":[27,83],"this":[28,94],"area,":[29],"the":[30,40,48,63,115,121,126,133],"approaches":[31],"usually":[32],"focus":[33],"either":[34],"performance":[36,64,100],"quality":[38,49],"results.":[41],"Vision":[42],"based":[43,67,72],"segmentation":[44],"is":[45,71,89],"focused":[50],"methods,":[51],"which":[52,84],"are":[53],"considerably":[54],"slow.":[55],"This":[56],"paper":[57],"proposes":[58],"approach":[60,70],"for":[61,137],"boosting":[62],"vision":[66],"algorithms.":[68],"Our":[69],"concepts":[74],"modern":[76],"and":[78],"very":[80],"common":[81,135],"scenario":[82],"entire":[86],"site":[88,116],"processed":[90],"at":[91],"once.":[92],"In":[93],"scenario,":[95],"great":[97],"amount":[98],"boost":[101],"can":[102],"gained":[104],"by":[105],"isomorphic":[106],"mapping":[107],"previous":[109],"results":[110,127],"gathered":[111],"from":[112],"pages":[113,119],"within":[114],"to":[117],"other":[118],"same":[122],"site.":[123],"We":[124],"provide":[125],"experiments":[129],"VIPS,":[132],"most":[134],"algorithm":[136],"segmentation.":[139]},"counts_by_year":[{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
