{"id":"https://openalex.org/W7138090673","doi":"https://doi.org/10.48550/arxiv.2603.13338","title":"OpenExtract: Automated Data Extraction for Systematic Reviews in Health","display_name":"OpenExtract: Automated Data Extraction for Systematic Reviews in Health","publication_year":2026,"publication_date":"2026-03-06","ids":{"openalex":"https://openalex.org/W7138090673","doi":"https://doi.org/10.48550/arxiv.2603.13338"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.13338","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13338","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.13338","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5094298497","display_name":"Jim Achterberg","orcid":"https://orcid.org/0009-0000-9589-7831"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Achterberg, Jim","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013443186","display_name":"Bram van Dijk","orcid":"https://orcid.org/0000-0002-6330-6934"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Van Dijk, Bram","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014308617","display_name":"Jing Meng","orcid":"https://orcid.org/0000-0001-8708-0485"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng, Jing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129738307","display_name":"Saif Ul Islam","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Islam, Saif Ul","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043277300","display_name":"Gregory Epiphaniou","orcid":"https://orcid.org/0000-0003-1054-6368"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Epiphaniou, Gregory","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129709905","display_name":"Carsten Maple","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maple, Carsten","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129650234","display_name":"Xuefei Ding","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding, Xuefei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026657286","display_name":"Theodoros N. Arvanitis","orcid":"https://orcid.org/0000-0001-5473-135X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arvanitis, Theodoros N.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057660363","display_name":"Simon Brouwer","orcid":"https://orcid.org/0000-0002-0956-0851"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brouwer, Simon","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072790276","display_name":"M. Haas","orcid":"https://orcid.org/0000-0003-2581-8370"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Haas, Marcel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5033880574","display_name":"Marco Spruit","orcid":"https://orcid.org/0000-0002-9237-221X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Spruit, Marco","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":11,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.25870001316070557,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.25870001316070557,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10206","display_name":"Meta-analysis and systematic reviews","score":0.25279998779296875,"subfield":{"id":"https://openalex.org/subfields/1804","display_name":"Statistics, Probability and Uncertainty"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.07599999755620956,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7531999945640564},{"id":"https://openalex.org/keywords/systematic-review","display_name":"Systematic review","score":0.6456999778747559},{"id":"https://openalex.org/keywords/data-extraction","display_name":"Data extraction","score":0.6294000148773193},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.46950000524520874},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.3522000014781952},{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.3458999991416931},{"id":"https://openalex.org/keywords/test-data","display_name":"Test data","score":0.33149999380111694},{"id":"https://openalex.org/keywords/health-data","display_name":"Health data","score":0.31360000371932983}],"concepts":[{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7531999945640564},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7161999940872192},{"id":"https://openalex.org/C189708586","wikidata":"https://www.wikidata.org/wiki/Q1504425","display_name":"Systematic review","level":3,"score":0.6456999778747559},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.6294000148773193},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6057999730110168},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.46950000524520874},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.44510000944137573},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4077000021934509},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3635999858379364},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.3522000014781952},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.3458999991416931},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.33149999380111694},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3181000053882599},{"id":"https://openalex.org/C3017977704","wikidata":"https://www.wikidata.org/wiki/Q18745135","display_name":"Health data","level":3,"score":0.31360000371932983},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.29670000076293945},{"id":"https://openalex.org/C2778864079","wikidata":"https://www.wikidata.org/wiki/Q173285","display_name":"Digital data","level":3,"score":0.29490000009536743},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.2937999963760376},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.2865999937057495},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.28630000352859497},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2833000123500824},{"id":"https://openalex.org/C55037315","wikidata":"https://www.wikidata.org/wiki/Q5421151","display_name":"Experimental data","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C2780433410","wikidata":"https://www.wikidata.org/wiki/Q5276090","display_name":"Digital health","level":3,"score":0.2639000117778778},{"id":"https://openalex.org/C2781083858","wikidata":"https://www.wikidata.org/wiki/Q17327049","display_name":"Scientific literature","level":2,"score":0.2590000033378601}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.13338","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13338","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.13338","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13338","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0],"study":[1],"presents":[2],"OpenExtract,":[3,39],"an":[4],"open-source":[5],"pipeline":[6,17],"for":[7],"automated":[8],"data":[9,25,80],"extraction":[10],"in":[11,48,69],"large-scale":[12],"systematic":[13,45],"literature":[14,46],"reviews.":[15],"The":[16],"queries":[18],"large":[19],"language":[20],"models":[21],"(LLMs)":[22],"to":[23,43],"predict":[24],"entries":[26],"based":[27],"on":[28],"relevant":[29],"sections":[30],"of":[31,38,57,66],"scientific":[32],"articles.":[33],"To":[34],"test":[35],"the":[36],"efficacy":[37],"we":[40],"apply":[41],"it":[42,74],"a":[44],"review":[47],"digital":[49],"health":[50],"and":[51,63,82],"compare":[52],"its":[53],"outputs":[54],"with":[55],"those":[56],"human":[58],"researchers.":[59],"OpenExtract":[60],"achieves":[61],"precision":[62],"recall":[64],"scores":[65],"&gt;":[67],"0.8":[68],"this":[70],"task,":[71],"indicating":[72],"that":[73],"can":[75],"be":[76],"effective":[77],"at":[78],"extracting":[79],"automatically":[81],"efficiently.":[83],"OpenExtract:":[84],"https://github.com/JimAchterbergLUMC/OpenExtract.":[85]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-18T00:00:00"}
