{"id":"https://openalex.org/W1889700700","doi":"https://doi.org/10.17562/pb-45-2","title":"A Flexible Table Parsing Approach","display_name":"A Flexible Table Parsing Approach","publication_year":2012,"publication_date":"2012-06-30","ids":{"openalex":"https://openalex.org/W1889700700","doi":"https://doi.org/10.17562/pb-45-2","mag":"1889700700"},"language":"en","primary_location":{"id":"doi:10.17562/pb-45-2","is_oa":true,"landing_page_url":"https://doi.org/10.17562/pb-45-2","pdf_url":"https://www.polibits.cidetec.ipn.mx/ojs/index.php/polibits/article/view/45-2/1742","source":{"id":"https://openalex.org/S4210186437","display_name":"Polibits","issn_l":"1870-9044","issn":["1870-9044","2395-8618"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Polibits","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://www.polibits.cidetec.ipn.mx/ojs/index.php/polibits/article/view/45-2/1742","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044245720","display_name":"Frank Schilder","orcid":"https://orcid.org/0000-0001-8227-5099"},"institutions":[{"id":"https://openalex.org/I68384125","display_name":"Thomson Reuters (United States)","ror":"https://ror.org/00m7gt169","country_code":"US","type":"company","lineage":["https://openalex.org/I68384125"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Frank Schilder","raw_affiliation_strings":["Thomson Reuters, Corporate Research and Development"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Thomson Reuters, Corporate Research and Development","institution_ids":["https://openalex.org/I68384125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072716945","display_name":"Ravi Kondadadi","orcid":null},"institutions":[{"id":"https://openalex.org/I68384125","display_name":"Thomson Reuters (United States)","ror":"https://ror.org/00m7gt169","country_code":"US","type":"company","lineage":["https://openalex.org/I68384125"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ravi Kondadadi","raw_affiliation_strings":["Thomson Reuters, Corporate Research and Development"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Thomson Reuters, Corporate Research and Development","institution_ids":["https://openalex.org/I68384125"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038239762","display_name":"Yana Kadiyska","orcid":null},"institutions":[{"id":"https://openalex.org/I68384125","display_name":"Thomson Reuters (United States)","ror":"https://ror.org/00m7gt169","country_code":"US","type":"company","lineage":["https://openalex.org/I68384125"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yana Kadiyska","raw_affiliation_strings":["Thomson Reuters Fixed Income, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Thomson Reuters Fixed Income, USA","institution_ids":["https://openalex.org/I68384125"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.12783062,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"45","issue":null,"first_page":"13","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10742","display_name":"Peer-to-Peer Network Technologies","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.7516810894012451},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6775634288787842},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.6615031361579895},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.48139098286628723},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4576396942138672},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.42868950963020325},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.32594743371009827},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.20231065154075623}],"concepts":[{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.7516810894012451},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6775634288787842},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.6615031361579895},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.48139098286628723},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4576396942138672},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.42868950963020325},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32594743371009827},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.20231065154075623}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.17562/pb-45-2","is_oa":true,"landing_page_url":"https://doi.org/10.17562/pb-45-2","pdf_url":"https://www.polibits.cidetec.ipn.mx/ojs/index.php/polibits/article/view/45-2/1742","source":{"id":"https://openalex.org/S4210186437","display_name":"Polibits","issn_l":"1870-9044","issn":["1870-9044","2395-8618"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Polibits","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.401.5177","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.401.5177","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://polibits.gelbukh.com/2012_45/A Flexible Table Parsing Approach.pdf","raw_type":"text"},{"id":"pmh:oai:redalyc.org:402640459003","is_oa":false,"landing_page_url":"https://www.redalyc.org/articulo.oa?id=402640459003","pdf_url":null,"source":{"id":"https://openalex.org/S4377196100","display_name":"Redalyc (Universidad Aut\u00f3noma del Estado de M\u00e9xico)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I179647637","host_organization_name":"Universidad Aut\u00f3noma del Estado de M\u00e9xico","host_organization_lineage":["https://openalex.org/I179647637"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Polibits (M\u00e9xico) Vol.45","raw_type":"art\u00edculo cient\u00edfico"}],"best_oa_location":{"id":"doi:10.17562/pb-45-2","is_oa":true,"landing_page_url":"https://doi.org/10.17562/pb-45-2","pdf_url":"https://www.polibits.cidetec.ipn.mx/ojs/index.php/polibits/article/view/45-2/1742","source":{"id":"https://openalex.org/S4210186437","display_name":"Polibits","issn_l":"1870-9044","issn":["1870-9044","2395-8618"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Polibits","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.4300000071525574}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1889700700.pdf","grobid_xml":"https://content.openalex.org/works/W1889700700.grobid-xml"},"referenced_works_count":14,"referenced_works":["https://openalex.org/W1512098439","https://openalex.org/W1627331591","https://openalex.org/W1967830139","https://openalex.org/W2034797903","https://openalex.org/W2038248725","https://openalex.org/W2092772700","https://openalex.org/W2093559286","https://openalex.org/W2102189859","https://openalex.org/W2102733302","https://openalex.org/W2405686381","https://openalex.org/W2496185699","https://openalex.org/W4244046749","https://openalex.org/W4250847188","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W579810227","https://openalex.org/W2952780262","https://openalex.org/W2979495269","https://openalex.org/W2392917763","https://openalex.org/W2948670949","https://openalex.org/W4288047943","https://openalex.org/W4221163897","https://openalex.org/W4381248170","https://openalex.org/W2817971408","https://openalex.org/W3189621521"],"abstract_inverted_index":{"Relational":[0],"data":[1,76,90],"is":[2,60,100],"often":[3],"encoded":[4],"in":[5,75,105],"tables.":[6],"Tables":[7],"are":[8,25,34],"easy":[9],"to":[10,16,41],"read":[11],"by":[12],"humans,":[13],"but":[14],"difficult":[15],"interpret":[17],"automatically.":[18],"In":[19],"cases":[20],"where":[21,32],"table":[22,44,55,70,83,95,98],"layout":[23],"cues":[24],"not":[26],"obtainable":[27],"(missing":[28],"HTML":[29],"tags)":[30],"or":[31],"columns":[33],"distorted":[35],"(by":[36],"copying":[37],"from":[38,88],"a":[39,53,63,89],"spreadsheet":[40],"text)":[42],"previous":[43],"extraction":[45],"approaches":[46],"run":[47],"into":[48],"problems.":[49],"This":[50],"paper":[51],"introduces":[52],"novel":[54],"parsing":[56,99],"approach.":[57],"Our":[58],"approach":[59],"based":[61],"on":[62],"set":[64],"of":[65],"simple":[66],"assumptions:":[67],"(a)":[68],"every":[69,82],"can":[71,84],"be":[72,85],"split":[73],"up":[74],"cells":[77],"and":[78,80],"headers,":[79],"(b)":[81],"parsed":[86],"beginning":[87],"cell":[91],"utilizing":[92],"the":[93],"overall":[94],"structure.":[96],"The":[97],"defined":[101],"as":[102],"\"table":[103],"flattening\"":[104],"this":[106],"paper.":[107]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
