{"id":"https://openalex.org/W2121137265","doi":"https://doi.org/10.3217/jucs-014-11-1893","title":"Recognising Informative Web Page Blocks Using Visual Segmentation for Efficient Information Extraction","display_name":"Recognising Informative Web Page Blocks Using Visual Segmentation for Efficient Information Extraction","publication_year":2020,"publication_date":"2020-04-07","ids":{"openalex":"https://openalex.org/W2121137265","doi":"https://doi.org/10.3217/jucs-014-11-1893","mag":"2121137265"},"language":"en","primary_location":{"id":"pmh:oai:zenodo.org:7000350","is_oa":true,"landing_page_url":"https://zenodo.org/record/7000350","pdf_url":"https://zenodo.org/record/7000350","source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"JUCS - Journal of Universal Computer Science 14((11)) 1893-1910","raw_type":"info:eu-repo/semantics/article"},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://zenodo.org/record/7000350","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072393721","display_name":"Jinbeom Kang","orcid":null},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Jinbeom Kang","raw_affiliation_strings":["Hanyang University"],"affiliations":[{"raw_affiliation_string":"Hanyang University","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032066880","display_name":"Joongmin Choi","orcid":null},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Joongmin Choi","raw_affiliation_strings":["Hanyang University"],"affiliations":[{"raw_affiliation_string":"Hanyang University","institution_ids":["https://openalex.org/I4575257"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5072393721"],"corresponding_institution_ids":["https://openalex.org/I4575257"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":16,"citation_normalized_percentile":{"value":0.00072838,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9797000288963318,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9749000072479248,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7038624286651611},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.601098358631134},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.555812656879425},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.5478203296661377},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.528324544429779},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.509395182132721},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4257616698741913},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3131450414657593},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.08018791675567627},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.058544814586639404}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7038624286651611},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.601098358631134},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.555812656879425},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.5478203296661377},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.528324544429779},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.509395182132721},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4257616698741913},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3131450414657593},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.08018791675567627},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.058544814586639404}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:zenodo.org:7000350","is_oa":true,"landing_page_url":"https://zenodo.org/record/7000350","pdf_url":"https://zenodo.org/record/7000350","source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"JUCS - Journal of Universal Computer Science 14((11)) 1893-1910","raw_type":"info:eu-repo/semantics/article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.376.4611","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.376.4611","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.jucs.org/jucs_14_11/recognising_informative_web_page/jucs_14_11_1893_1910_kang.pdf","raw_type":"text"},{"id":"doi:10.3217/jucs-014-11-1893","is_oa":true,"landing_page_url":"https://doi.org/10.3217/jucs-014-11-1893","pdf_url":null,"source":{"id":"https://openalex.org/S4306400660","display_name":"TUGraz OPEN Library (Graz University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4092182","host_organization_name":"Graz University of Technology","host_organization_lineage":["https://openalex.org/I4092182"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:zenodo.org:7000350","is_oa":true,"landing_page_url":"https://zenodo.org/record/7000350","pdf_url":"https://zenodo.org/record/7000350","source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"JUCS - Journal of Universal Computer Science 14((11)) 1893-1910","raw_type":"info:eu-repo/semantics/article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2121137265.pdf","grobid_xml":"https://content.openalex.org/works/W2121137265.grobid-xml"},"referenced_works_count":14,"referenced_works":["https://openalex.org/W1535788949","https://openalex.org/W1803802947","https://openalex.org/W1978478796","https://openalex.org/W1994382199","https://openalex.org/W2032781560","https://openalex.org/W2049461910","https://openalex.org/W2095680579","https://openalex.org/W2128836931","https://openalex.org/W2134150392","https://openalex.org/W2144630894","https://openalex.org/W2150721933","https://openalex.org/W2154444297","https://openalex.org/W2168358004","https://openalex.org/W2421105961"],"related_works":["https://openalex.org/W2611741382","https://openalex.org/W4379231730","https://openalex.org/W3135843367","https://openalex.org/W2613685774","https://openalex.org/W3112355890","https://openalex.org/W4220741973","https://openalex.org/W4389858081","https://openalex.org/W2377297411","https://openalex.org/W1901649692","https://openalex.org/W4385731361"],"abstract_inverted_index":{"Abstract:":[0],"As":[1],"web":[2,11,67,96],"sites":[3],"are":[4],"getting":[5],"more":[6,16,78,149],"complicated,":[7],"the":[8,24,28,35,43,50,62,125,143,161],"construction":[9],"of":[10,30,101,138,163],"information":[12,37,71,164],"extraction":[13,72,165],"systems":[14],"becomes":[15],"troublesome":[17],"and":[18,93,104,133,142,159],"time-consuming.":[19],"A":[20,136],"common":[21],"theme":[22],"is":[23,38,58],"difficulty":[25],"in":[26,33,65,154],"locating":[27],"segments":[29],"a":[31,66,95,99,118],"page":[32,68,88,97],"which":[34,40,57],"target":[36],"contained,":[39],"we":[41],"call":[42],"informative":[44,63,126,156],"blocks.":[45],"This":[46],"article":[47],"reports":[48],"on":[49,76,82],"Recognising":[51],"Informative":[52],"Page":[53],"Blocks":[54],"algorithm":[55,85],"(RIPB),":[56],"able":[59],"to":[60,91],"identify":[61],"block":[64,89,114,127,157],"so":[69],"that":[70,146],"algorithms":[73],"can":[74],"work":[75],"it":[77],"efficiently.":[79],"RIPB":[80,123,147],"relies":[81],"an":[83],"existing":[84],"for":[86],"vision-based":[87],"segmentation":[90],"analyse":[92],"partition":[94],"into":[98,113],"set":[100],"visual":[102],"blocks,":[103],"then":[105],"groups":[106],"related":[107],"blocks":[108],"with":[109],"similar":[110],"content":[111],"structures":[112],"clusters":[115],"by":[116,129,166],"using":[117,130],"tree":[119,131,134],"edit":[120],"distance":[121],"method.":[122],"recognises":[124],"cluster":[128],"alignment":[132],"matching.":[135],"series":[137],"experiments":[139],"were":[140,145],"performed,":[141],"conclusions":[144],"was":[148],"than":[150],"95":[151],"%":[152],"accurate":[153],"recognising":[155],"clusters,":[158],"improved":[160],"efficiency":[162],"17%.":[167]},"counts_by_year":[{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
