{"id":"https://openalex.org/W1970243205","doi":"https://doi.org/10.1109/icdar.2007.4376990","title":"Layout Based Information Extraction from HTML Documents","display_name":"Layout Based Information Extraction from HTML Documents","publication_year":2007,"publication_date":"2007-09-01","ids":{"openalex":"https://openalex.org/W1970243205","doi":"https://doi.org/10.1109/icdar.2007.4376990","mag":"1970243205"},"language":"en","primary_location":{"id":"doi:10.1109/icdar.2007.4376990","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdar.2007.4376990","pdf_url":null,"source":{"id":"https://openalex.org/S4210215987","display_name":"Proceedings of the International Conference on Document Analysis and Recognition","issn_l":"1520-5363","issn":["1520-5363","2379-2140"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ninth International Conference on Document Analysis and Recognition (ICDAR 2007) Vol 2","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040028014","display_name":"Radek Burget","orcid":"https://orcid.org/0000-0001-5233-0456"},"institutions":[{"id":"https://openalex.org/I60587646","display_name":"Brno University of Technology","ror":"https://ror.org/03613d656","country_code":"CZ","type":"education","lineage":["https://openalex.org/I60587646"]}],"countries":["CZ"],"is_corresponding":true,"raw_author_name":"R. Burget","raw_affiliation_strings":["Faculty of Information Technology, Brno University of Technology, Brno, Czech Republic","Brno University of Technology, Brno#TAB#"],"affiliations":[{"raw_affiliation_string":"Faculty of Information Technology, Brno University of Technology, Brno, Czech Republic","institution_ids":["https://openalex.org/I60587646"]},{"raw_affiliation_string":"Brno University of Technology, Brno#TAB#","institution_ids":["https://openalex.org/I60587646"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5040028014"],"corresponding_institution_ids":["https://openalex.org/I60587646"],"apc_list":null,"apc_paid":null,"fwci":3.3353,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.92590075,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"624","last_page":"628"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9764000177383423,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8581980466842651},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.6918957233428955},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6085506677627563},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.6004116535186768},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5688897371292114},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5567542910575867},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.5034891963005066},{"id":"https://openalex.org/keywords/document-layout-analysis","display_name":"Document layout analysis","score":0.4961460530757904},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.43610674142837524},{"id":"https://openalex.org/keywords/document-structure-description","display_name":"Document Structure Description","score":0.4304264783859253},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36839616298675537},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3358352482318878},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.21063879132270813},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.18001654744148254},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.15710097551345825},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.07580551505088806}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8581980466842651},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.6918957233428955},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6085506677627563},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.6004116535186768},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5688897371292114},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5567542910575867},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.5034891963005066},{"id":"https://openalex.org/C72773152","wikidata":"https://www.wikidata.org/wiki/Q5287629","display_name":"Document layout analysis","level":3,"score":0.4961460530757904},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.43610674142837524},{"id":"https://openalex.org/C68699486","wikidata":"https://www.wikidata.org/wiki/Q265904","display_name":"Document Structure Description","level":3,"score":0.4304264783859253},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36839616298675537},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3358352482318878},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.21063879132270813},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.18001654744148254},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.15710097551345825},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.07580551505088806},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icdar.2007.4376990","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdar.2007.4376990","pdf_url":null,"source":{"id":"https://openalex.org/S4210215987","display_name":"Proceedings of the International Conference on Document Analysis and Recognition","issn_l":"1520-5363","issn":["1520-5363","2379-2140"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ninth International Conference on Document Analysis and Recognition (ICDAR 2007) Vol 2","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W1503682011","https://openalex.org/W1521663168","https://openalex.org/W1553229631","https://openalex.org/W1748877974","https://openalex.org/W1803802947","https://openalex.org/W2012575882","https://openalex.org/W2015933107","https://openalex.org/W2105413446","https://openalex.org/W2134590793","https://openalex.org/W2155758288","https://openalex.org/W6633179905"],"related_works":["https://openalex.org/W1522196789","https://openalex.org/W1971234693","https://openalex.org/W2132006538","https://openalex.org/W1534205747","https://openalex.org/W840083456","https://openalex.org/W128719082","https://openalex.org/W4246755522","https://openalex.org/W4231742017","https://openalex.org/W2062193808","https://openalex.org/W4385504528"],"abstract_inverted_index":{"We":[0],"propose":[1],"a":[2],"method":[3],"of":[4,40,43],"information":[5,15],"extraction":[6,33,68],"from":[7],"HTML":[8],"documents":[9],"based":[10,36],"on":[11,37],"modelling":[12],"the":[13,17,27,32,38,44,57,67],"visual":[14,49],"in":[16],"document.":[18],"A":[19],"page":[20],"segmentation":[21],"algorithm":[22],"is":[23,35,53],"used":[24],"for":[25,66],"detecting":[26],"document":[28],"layout":[29],"and":[30,47,61],"subsequently,":[31],"process":[34],"analysis":[39],"mutual":[41],"positions":[42],"detected":[45],"blocks":[46],"their":[48],"features.":[50],"This":[51],"approach":[52],"more":[54],"robust":[55],"that":[56],"traditional":[58],"DOM-based":[59],"methods":[60],"it":[62],"opens":[63],"new":[64],"possibilities":[65],"task":[69],"specification.":[70]},"counts_by_year":[{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":3},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
