{"id":"https://openalex.org/W2913991821","doi":"https://doi.org/10.1007/s10032-019-00317-0","title":"A framework for information extraction from tables in biomedical literature","display_name":"A framework for information extraction from tables in biomedical literature","publication_year":2019,"publication_date":"2019-02-15","ids":{"openalex":"https://openalex.org/W2913991821","doi":"https://doi.org/10.1007/s10032-019-00317-0","mag":"2913991821"},"language":"en","primary_location":{"id":"doi:10.1007/s10032-019-00317-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10032-019-00317-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10032-019-00317-0.pdf","source":{"id":"https://openalex.org/S90108747","display_name":"International Journal on Document Analysis and Recognition (IJDAR)","issn_l":"1433-2825","issn":["1433-2825","1433-2833"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal on Document Analysis and Recognition (IJDAR)","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s10032-019-00317-0.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Nikola Milosevic","orcid":"https://orcid.org/0000-0003-2706-9676"},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Nikola Milosevic","raw_affiliation_strings":["School of Computer Science, University of Manchester, Manchester, M13 9PL, UK"],"raw_orcid":"https://orcid.org/0000-0003-2706-9676","affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Manchester, Manchester, M13 9PL, UK","institution_ids":["https://openalex.org/I28407311"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Cassie Gregson","orcid":null},"institutions":[{"id":"https://openalex.org/I105036370","display_name":"AstraZeneca (United Kingdom)","ror":"https://ror.org/04r9x1a08","country_code":"GB","type":"company","lineage":["https://openalex.org/I105036370"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Cassie Gregson","raw_affiliation_strings":["AstraZeneca plc, Cambridge, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AstraZeneca plc, Cambridge, UK","institution_ids":["https://openalex.org/I105036370"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Robert Hernandez","orcid":null},"institutions":[{"id":"https://openalex.org/I105036370","display_name":"AstraZeneca (United Kingdom)","ror":"https://ror.org/04r9x1a08","country_code":"GB","type":"company","lineage":["https://openalex.org/I105036370"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Robert Hernandez","raw_affiliation_strings":["AstraZeneca plc, Cambridge, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"AstraZeneca plc, Cambridge, UK","institution_ids":["https://openalex.org/I105036370"]}]},{"author_position":"last","author":{"id":null,"display_name":"Goran Nenadic","orcid":null},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Goran Nenadic","raw_affiliation_strings":["School of Computer Science, University of Manchester, Manchester, M13 9PL, UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, University of Manchester, Manchester, M13 9PL, UK","institution_ids":["https://openalex.org/I28407311"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I28407311"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":2.3078,"has_fulltext":true,"cited_by_count":49,"citation_normalized_percentile":{"value":0.88948419,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"22","issue":"1","first_page":"55","last_page":"78"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.41830000281333923,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.41830000281333923,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.24619999527931213,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.16290000081062317,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.7268999814987183},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.7063999772071838},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5871999859809875},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5335999727249146},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5306000113487244},{"id":"https://openalex.org/keywords/biomedical-text-mining","display_name":"Biomedical text mining","score":0.41119998693466187},{"id":"https://openalex.org/keywords/relationship-extraction","display_name":"Relationship extraction","score":0.3806000053882599},{"id":"https://openalex.org/keywords/knowledge-extraction","display_name":"Knowledge extraction","score":0.36980000138282776}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7617999911308289},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.7268999814987183},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.7063999772071838},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6252999901771545},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5871999859809875},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5335999727249146},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5306000113487244},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.487199991941452},{"id":"https://openalex.org/C165141518","wikidata":"https://www.wikidata.org/wiki/Q4915126","display_name":"Biomedical text mining","level":3,"score":0.41119998693466187},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.39399999380111694},{"id":"https://openalex.org/C153604712","wikidata":"https://www.wikidata.org/wiki/Q7310755","display_name":"Relationship extraction","level":3,"score":0.3806000053882599},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.36980000138282776},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3684999942779541},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.3650999963283539},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3569999933242798},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3555999994277954},{"id":"https://openalex.org/C180198813","wikidata":"https://www.wikidata.org/wiki/Q121182","display_name":"Information system","level":2,"score":0.31929999589920044},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.31119999289512634},{"id":"https://openalex.org/C172967692","wikidata":"https://www.wikidata.org/wiki/Q747762","display_name":"Decision table","level":3,"score":0.3003000020980835},{"id":"https://openalex.org/C71472368","wikidata":"https://www.wikidata.org/wiki/Q676880","display_name":"Text mining","level":2,"score":0.29260000586509705},{"id":"https://openalex.org/C2781083858","wikidata":"https://www.wikidata.org/wiki/Q17327049","display_name":"Scientific literature","level":2,"score":0.2913999855518341},{"id":"https://openalex.org/C33326189","wikidata":"https://www.wikidata.org/wiki/Q17092450","display_name":"Information integration","level":2,"score":0.2912999987602234},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2671000063419342},{"id":"https://openalex.org/C87868495","wikidata":"https://www.wikidata.org/wiki/Q750843","display_name":"Information processing","level":2,"score":0.26510000228881836},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C68476402","wikidata":"https://www.wikidata.org/wiki/Q1456936","display_name":"Table of contents","level":2,"score":0.260699987411499}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1007/s10032-019-00317-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10032-019-00317-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10032-019-00317-0.pdf","source":{"id":"https://openalex.org/S90108747","display_name":"International Journal on Document Analysis and Recognition (IJDAR)","issn_l":"1433-2825","issn":["1433-2825","1433-2833"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal on Document Analysis and Recognition (IJDAR)","raw_type":"journal-article"},{"id":"pmh:oai:pure.atira.dk:openaire/3bc6e439-c54e-44d5-8e64-5f3216f623d1","is_oa":true,"landing_page_url":"https://research.manchester.ac.uk/en/publications/3bc6e439-c54e-44d5-8e64-5f3216f623d1","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Milo\u0161evi\u0107, N, Gregson, C, Hernandez, R & Nenadic, G 2019, 'A framework for information extraction from tables in biomedical literature', International Journal of Document Analysis and Recognition, vol. 22, no. 1, pp. 55-78. https://doi.org/10.1007/s10032-019-00317-0","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:arXiv.org:1902.10031","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1902.10031","pdf_url":"https://arxiv.org/pdf/1902.10031","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1007/s10032-019-00317-0","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s10032-019-00317-0","pdf_url":"https://link.springer.com/content/pdf/10.1007/s10032-019-00317-0.pdf","source":{"id":"https://openalex.org/S90108747","display_name":"International Journal on Document Analysis and Recognition (IJDAR)","issn_l":"1433-2825","issn":["1433-2825","1433-2833"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal on Document Analysis and Recognition (IJDAR)","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320307770","display_name":"AstraZeneca","ror":"https://ror.org/04r9x1a08"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2913991821.pdf","grobid_xml":"https://content.openalex.org/works/W2913991821.grobid-xml"},"referenced_works_count":34,"referenced_works":["https://openalex.org/W92812941","https://openalex.org/W102708294","https://openalex.org/W1562673218","https://openalex.org/W1971703763","https://openalex.org/W1988217119","https://openalex.org/W1995296910","https://openalex.org/W2001793573","https://openalex.org/W2020651570","https://openalex.org/W2024791376","https://openalex.org/W2029017735","https://openalex.org/W2042448356","https://openalex.org/W2072944755","https://openalex.org/W2073243352","https://openalex.org/W2076063813","https://openalex.org/W2078206655","https://openalex.org/W2081580037","https://openalex.org/W2086677639","https://openalex.org/W2102189859","https://openalex.org/W2102672399","https://openalex.org/W2118978333","https://openalex.org/W2133990480","https://openalex.org/W2134164043","https://openalex.org/W2134708958","https://openalex.org/W2141756865","https://openalex.org/W2155361294","https://openalex.org/W2159583324","https://openalex.org/W2166703617","https://openalex.org/W2187250581","https://openalex.org/W2250539671","https://openalex.org/W2420216091","https://openalex.org/W2471014220","https://openalex.org/W2919115771","https://openalex.org/W3160877400","https://openalex.org/W4230341144"],"related_works":[],"abstract_inverted_index":{"The":[0,43],"scientific":[1],"literature":[2],"is":[3,71],"growing":[4],"exponentially,":[5],"and":[6,29,41,64,84,101,135,139,150,157],"professionals":[7],"are":[8],"no":[9],"more":[10],"able":[11],"to":[12,27],"cope":[13],"with":[14,144],"the":[15,24,73,92,145,154],"current":[16],"amount":[17],"of":[18,36,66,79],"publications.":[19],"Text":[20],"mining":[21,47,58],"provided":[22],"in":[23,46,91,110],"past":[25],"methods":[26,74],"retrieve":[28],"extract":[30],"information":[31,88,106],"from":[32,89,108],"text;":[33],"however,":[34],"most":[35],"these":[37],"approaches":[38],"ignored":[39],"tables":[40,90,109],"figures.":[42],"research":[44,70],"done":[45],"table":[48,118],"data":[49],"still":[50],"does":[51],"not":[52],"have":[53],"an":[54,102],"integrated":[55],"approach":[56,142],"for":[57,75,105],"that":[59,113],"would":[60],"consider":[61],"all":[62],"complexities":[63],"challenges":[65],"a":[67,97],"table.":[68],"Our":[69,141],"examining":[72],"extracting":[76],"numerical":[77],"(number":[78],"patients,":[80],"age,":[81],"gender":[82],"distribution)":[83],"textual":[85],"(adverse":[86],"reactions)":[87],"clinical":[93,111],"literature.":[94],"We":[95],"present":[96],"requirement":[98],"analysis":[99],"template":[100],"integral":[103],"methodology":[104],"extraction":[107],"domain":[112],"contains":[114],"7":[115],"steps:":[116],"(1)":[117],"detection,":[119],"(2)":[120],"functional":[121],"processing,":[122,125,131],"(3)":[123],"structural":[124],"(4)":[126],"semantic":[127],"tagging,":[128],"(5)":[129],"pragmatic":[130],"(6)":[132],"cell":[133],"selection":[134],"(7)":[136],"syntactic":[137],"processing":[138],"extraction.":[140],"performed":[143],"F-measure":[146],"ranged":[147],"between":[148],"82":[149],"92%,":[151],"depending":[152],"on":[153],"variable,":[155],"task":[156],"its":[158],"complexity.":[159]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":8},{"year":2021,"cited_by_count":13},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":1}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2019-02-21T00:00:00"}
