{"id":"https://openalex.org/W2099136948","doi":"https://doi.org/10.1109/cit.2004.1357214","title":"Extraction and integration information in HTML tables","display_name":"Extraction and integration information in HTML tables","publication_year":2004,"publication_date":"2004-12-23","ids":{"openalex":"https://openalex.org/W2099136948","doi":"https://doi.org/10.1109/cit.2004.1357214","mag":"2099136948"},"language":"en","primary_location":{"id":"doi:10.1109/cit.2004.1357214","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cit.2004.1357214","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Fourth International Conference onComputer and Information Technology, 2004. CIT '04.","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100678044","display_name":"Shijun Li","orcid":"https://orcid.org/0000-0002-4495-0732"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shijun Li","raw_affiliation_strings":["School of Computer, Wuhan University of China, China","School of Computer, Wuhan university, China"],"affiliations":[{"raw_affiliation_string":"School of Computer, Wuhan University of China, China","institution_ids":["https://openalex.org/I37461747"]},{"raw_affiliation_string":"School of Computer, Wuhan university, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030633869","display_name":"Zhiyong Peng","orcid":"https://orcid.org/0000-0001-5441-1292"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]},{"id":"https://openalex.org/I4391768271","display_name":"State Key Laboratory of Software Engineering","ror":"https://ror.org/01z3jn402","country_code":null,"type":"facility","lineage":["https://openalex.org/I37461747","https://openalex.org/I4391768271"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyong Peng","raw_affiliation_strings":["State Key Laboratory of Software Engineering, Wuhan University of China, China","Wuhan Univ.,"],"affiliations":[{"raw_affiliation_string":"State Key Laboratory of Software Engineering, Wuhan University of China, China","institution_ids":["https://openalex.org/I37461747","https://openalex.org/I4391768271"]},{"raw_affiliation_string":"Wuhan Univ.,","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085983836","display_name":"Mengchi Liu","orcid":"https://orcid.org/0000-0002-8245-2355"},"institutions":[{"id":"https://openalex.org/I67031392","display_name":"Carleton University","ror":"https://ror.org/02qtvee93","country_code":"CA","type":"education","lineage":["https://openalex.org/I67031392"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Mengchi Liu","raw_affiliation_strings":["School of Computer Science, Carleton University, Ottawa, Canada","(Carleton University)"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Carleton University, Ottawa, Canada","institution_ids":["https://openalex.org/I67031392"]},{"raw_affiliation_string":"(Carleton University)","institution_ids":["https://openalex.org/I67031392"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100678044"],"corresponding_institution_ids":["https://openalex.org/I37461747"],"apc_list":null,"apc_paid":null,"fwci":3.1563,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.9290621,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"315","last_page":"320"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9857000112533569,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8643194437026978},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6896114945411682},{"id":"https://openalex.org/keywords/html-element","display_name":"HTML element","score":0.5968820452690125},{"id":"https://openalex.org/keywords/disk-formatting","display_name":"Disk formatting","score":0.5878927111625671},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.5362865924835205},{"id":"https://openalex.org/keywords/html","display_name":"HTML","score":0.46338900923728943},{"id":"https://openalex.org/keywords/information-integration","display_name":"Information integration","score":0.4195975661277771},{"id":"https://openalex.org/keywords/data-integration","display_name":"Data integration","score":0.41714999079704285},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.41371262073516846},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.27262967824935913},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.24193355441093445}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8643194437026978},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6896114945411682},{"id":"https://openalex.org/C81639021","wikidata":"https://www.wikidata.org/wiki/Q179551","display_name":"HTML element","level":3,"score":0.5968820452690125},{"id":"https://openalex.org/C88006597","wikidata":"https://www.wikidata.org/wiki/Q690117","display_name":"Disk formatting","level":2,"score":0.5878927111625671},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.5362865924835205},{"id":"https://openalex.org/C138708601","wikidata":"https://www.wikidata.org/wiki/Q8811","display_name":"HTML","level":3,"score":0.46338900923728943},{"id":"https://openalex.org/C33326189","wikidata":"https://www.wikidata.org/wiki/Q17092450","display_name":"Information integration","level":2,"score":0.4195975661277771},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.41714999079704285},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.41371262073516846},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27262967824935913},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.24193355441093445},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cit.2004.1357214","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cit.2004.1357214","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Fourth International Conference onComputer and Information Technology, 2004. CIT '04.","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1528213927","https://openalex.org/W1553019137","https://openalex.org/W1602270052","https://openalex.org/W1607871842","https://openalex.org/W1829475407","https://openalex.org/W2004193186","https://openalex.org/W2075043418","https://openalex.org/W2097456972","https://openalex.org/W2127767383","https://openalex.org/W2148210463","https://openalex.org/W6633154970","https://openalex.org/W6674996676","https://openalex.org/W6681973738"],"related_works":["https://openalex.org/W4255056669","https://openalex.org/W2019215039","https://openalex.org/W622138672","https://openalex.org/W2913428691","https://openalex.org/W598961908","https://openalex.org/W2508179278","https://openalex.org/W4243796650","https://openalex.org/W2992855911","https://openalex.org/W169337252","https://openalex.org/W2088084743"],"abstract_inverted_index":{"A":[0],"large":[1],"amount":[2],"of":[3,57,81,90,126,140],"information":[4,31,39,85],"available":[5],"on":[6],"the":[7,54,69,79,88,95,112,120,124,138,146,151,155,159,168],"Web":[8],"is":[9,40],"formatted":[10],"in":[11,32,72,83,162],"HTML":[12,33,58,63,73,91,128,143],"tables,":[13,59],"which":[14],"are":[15,19],"mainly":[16],"presentation-oriented":[17],"and":[18,36,60,77,98,150,157],"not":[20],"suited":[21],"for":[22,116],"database":[23],"applications.":[24],"As":[25],"a":[26,41,48],"result,":[27],"how":[28],"to":[29,86,106,130,153,167],"capture":[30],"tables":[34,74,129],"semantically":[35],"integrate":[37],"relevant":[38],"challenge.":[42],"In":[43],"this":[44],"paper,":[45],"we":[46],"present":[47],"new":[49],"approach":[50],"that":[51],"automatically":[52,67],"captures":[53,68],"semantic":[55,114,148],"hierarchies":[56],"semi-automatically":[61],"integrates":[62,137],"tables.":[64,92],"It":[65],"first":[66],"attribute-value":[70],"pairs":[71],"by":[75,102],"normalization,":[76],"introduces":[78],"notion":[80],"eigenvalue":[82],"formatting":[84],"recognize":[87],"headings":[89],"After":[93],"generating":[94],"global":[96,99,118,133,169],"concepts":[97],"schema":[100,166],"manually":[101],"defining":[103],"what":[104],"data":[105,139],"be":[107],"integrated,":[108],"it":[109,136],"then":[110],"learns":[111],"lexical":[113,147],"set":[115],"each":[117,141,164],"concept,":[119],"contexts":[121,152],"via":[122],"labelling":[123],"attributes":[125],"example":[127],"their":[131],"corresponding":[132],"concept.":[134],"Finally,":[135],"source":[142,165],"table":[144],"using":[145],"sets":[149],"eliminate":[154],"conflicts":[156],"solve":[158],"nondeterministic":[160],"problems":[161],"mapping":[163],"schema.":[170]},"counts_by_year":[{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2012,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
