{"id":"https://openalex.org/W2102189859","doi":"https://doi.org/10.1145/1242572.1242583","title":"Towards domain-independent information extraction from web tables","display_name":"Towards domain-independent information extraction from web tables","publication_year":2007,"publication_date":"2007-05-08","ids":{"openalex":"https://openalex.org/W2102189859","doi":"https://doi.org/10.1145/1242572.1242583","mag":"2102189859"},"language":"en","primary_location":{"id":"doi:10.1145/1242572.1242583","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1242572.1242583","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th international conference on World Wide Web","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086781628","display_name":"Wolfgang Gatterbauer","orcid":"https://orcid.org/0000-0002-9614-0504"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":true,"raw_author_name":"Wolfgang Gatterbauer","raw_affiliation_strings":["Vienna University of Technology, Vienna, Austria","Vienna University of technology, Vienna, Austria"],"affiliations":[{"raw_affiliation_string":"Vienna University of Technology, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]},{"raw_affiliation_string":"Vienna University of technology, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020654461","display_name":"Paul Bohunsky","orcid":null},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Paul Bohunsky","raw_affiliation_strings":["Vienna University of Technology, Vienna, Austria","Vienna University of technology, Vienna, Austria"],"affiliations":[{"raw_affiliation_string":"Vienna University of Technology, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]},{"raw_affiliation_string":"Vienna University of technology, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111482934","display_name":"Marcus Herzog","orcid":null},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Marcus Herzog","raw_affiliation_strings":["Vienna University of Technology, Vienna, Austria","Vienna University of technology, Vienna, Austria"],"affiliations":[{"raw_affiliation_string":"Vienna University of Technology, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]},{"raw_affiliation_string":"Vienna University of technology, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081670164","display_name":"Bernhard Kr\u00fcpl","orcid":null},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Bernhard Kr\u00fcpl","raw_affiliation_strings":["Vienna University of Technology, Vienna, Austria","Vienna University of technology, Vienna, Austria"],"affiliations":[{"raw_affiliation_string":"Vienna University of Technology, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]},{"raw_affiliation_string":"Vienna University of technology, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040508945","display_name":"Bernhard Pollak","orcid":null},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Bernhard Pollak","raw_affiliation_strings":["Vienna University of Technology, Vienna, Austria","Vienna University of technology, Vienna, Austria"],"affiliations":[{"raw_affiliation_string":"Vienna University of Technology, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]},{"raw_affiliation_string":"Vienna University of technology, Vienna, Austria","institution_ids":["https://openalex.org/I145847075"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5086781628"],"corresponding_institution_ids":["https://openalex.org/I145847075"],"apc_list":null,"apc_paid":null,"fwci":48.9563,"has_fulltext":false,"cited_by_count":228,"citation_normalized_percentile":{"value":0.99805338,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"71","last_page":"80"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7812799215316772},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5315406322479248},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4983954429626465},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4792059361934662},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3448389172554016},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11373627185821533}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7812799215316772},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5315406322479248},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4983954429626465},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4792059361934662},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3448389172554016},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11373627185821533},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1242572.1242583","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1242572.1242583","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th international conference on World Wide Web","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.69.5068","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.69.5068","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.dbai.tuwien.ac.at/user/pollak/paper790.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W141724566","https://openalex.org/W158800878","https://openalex.org/W1493490255","https://openalex.org/W1509595041","https://openalex.org/W1542671304","https://openalex.org/W1558832481","https://openalex.org/W1566513354","https://openalex.org/W1569639771","https://openalex.org/W1595439448","https://openalex.org/W1748877974","https://openalex.org/W1909019684","https://openalex.org/W1986398135","https://openalex.org/W1988217119","https://openalex.org/W2004193186","https://openalex.org/W2042448356","https://openalex.org/W2052198547","https://openalex.org/W2052889856","https://openalex.org/W2078206655","https://openalex.org/W2092772700","https://openalex.org/W2093559286","https://openalex.org/W2095680579","https://openalex.org/W2096031153","https://openalex.org/W2096496923","https://openalex.org/W2102350406","https://openalex.org/W2103158282","https://openalex.org/W2103931177","https://openalex.org/W2104576390","https://openalex.org/W2121871415","https://openalex.org/W2124576009","https://openalex.org/W2125570474","https://openalex.org/W2128341918","https://openalex.org/W2131240202","https://openalex.org/W2133669904","https://openalex.org/W2136379584","https://openalex.org/W2143309843","https://openalex.org/W2151825758","https://openalex.org/W2160807665","https://openalex.org/W2166686713"],"related_works":["https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2382290278","https://openalex.org/W2350741829","https://openalex.org/W2130043461","https://openalex.org/W1901649692","https://openalex.org/W2165268584"],"abstract_inverted_index":{"Traditionally,":[0],"information":[1,49,81,91],"extraction":[2,50],"from":[3,51,58,129],"web":[4,32,52,63,76],"tables":[5,33,53],"has":[6],"focused":[7],"on":[8,17,82],"small,":[9],"more":[10],"or":[11],"less":[12],"homogeneous":[13],"corpora,":[14],"often":[15],"based":[16],"assumptions":[18],"about":[19,103],"the":[20,45,59,69,80,83,96,119,130],"use":[21],"of":[22,27,31,47,62,68,125],"&amp;lt;table&amp;gt;":[23],"tags.":[24],"A":[25],"multitude":[26],"different":[28],"HTML":[29],"implementations":[30],"make":[34],"these":[35],"approaches":[36],"difficult":[37],"to":[38,65,78,94],"scale.":[39],"In":[40],"this":[41,115],"paper,":[42],"we":[43],"approach":[44,116],"problem":[46],"domain-independent":[48],"by":[54,75,99],"shifting":[55],"our":[56],"attention":[57],"tree-based":[60],"representation":[61],"pages":[64],"a":[66,112,122],"variation":[67],"two-dimensional":[70],"visual":[71],"box":[72],"model":[73],"used":[74],"browsers":[77],"display":[79],"screen.":[84],"The":[85],"thereby":[86],"obtained":[87],"topological":[88],"and":[89,105],"style":[90],"allows":[92],"us":[93],"fill":[95],"gap":[97],"created":[98],"missing":[100],"domain-specific":[101],"knowledge":[102,127],"content":[104],"table":[106],"templates.":[107],"We":[108],"believe":[109],"that,":[110],"in":[111],"future":[113],"step,":[114],"can":[117],"become":[118],"basis":[120],"for":[121],"new":[123],"way":[124],"large-scale":[126],"acquisition":[128],"current":[131],"\u201cVisual":[132],"Web.\u201d":[133]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":9},{"year":2017,"cited_by_count":11},{"year":2016,"cited_by_count":18},{"year":2015,"cited_by_count":13},{"year":2014,"cited_by_count":20},{"year":2013,"cited_by_count":26},{"year":2012,"cited_by_count":10}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
