{"id":"https://openalex.org/W4406458796","doi":"https://doi.org/10.1109/bigdata62323.2024.10825206","title":"AI-Ready Data: Knowledge Extraction from Archival Lab Notebooks","display_name":"AI-Ready Data: Knowledge Extraction from Archival Lab Notebooks","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406458796","doi":"https://doi.org/10.1109/bigdata62323.2024.10825206"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10825206","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825206","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045632528","display_name":"Joel Pepper","orcid":"https://orcid.org/0000-0002-1601-8729"},"institutions":[{"id":"https://openalex.org/I72816309","display_name":"Drexel University","ror":"https://ror.org/04bdffz58","country_code":"US","type":"education","lineage":["https://openalex.org/I72816309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Joel Pepper","raw_affiliation_strings":["Drexel University,Department of Computer Science,Philadelphia,PA,USA"],"affiliations":[{"raw_affiliation_string":"Drexel University,Department of Computer Science,Philadelphia,PA,USA","institution_ids":["https://openalex.org/I72816309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018951212","display_name":"Elizabeth A. Jones","orcid":"https://orcid.org/0000-0003-4667-3238"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Elizabeth Jones","raw_affiliation_strings":["Northeastern University,Department of Computer Science,Boston,MA,USA"],"affiliations":[{"raw_affiliation_string":"Northeastern University,Department of Computer Science,Boston,MA,USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012758615","display_name":"Xintong Zhao","orcid":"https://orcid.org/0000-0003-1867-804X"},"institutions":[{"id":"https://openalex.org/I72816309","display_name":"Drexel University","ror":"https://ror.org/04bdffz58","country_code":"US","type":"education","lineage":["https://openalex.org/I72816309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xintong Zhao","raw_affiliation_strings":["Drexel University,Department of Information Science,Philadelphia,PA,USA"],"affiliations":[{"raw_affiliation_string":"Drexel University,Department of Information Science,Philadelphia,PA,USA","institution_ids":["https://openalex.org/I72816309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001027068","display_name":"Jacob Furst","orcid":null},"institutions":[{"id":"https://openalex.org/I106165777","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12","country_code":"US","type":"education","lineage":["https://openalex.org/I106165777"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jacob Furst","raw_affiliation_strings":["University of Central Florida,Department of Chemistry,Orlando,FL,USA"],"affiliations":[{"raw_affiliation_string":"University of Central Florida,Department of Chemistry,Orlando,FL,USA","institution_ids":["https://openalex.org/I106165777"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020983477","display_name":"Kyle Langlois","orcid":null},"institutions":[{"id":"https://openalex.org/I106165777","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12","country_code":"US","type":"education","lineage":["https://openalex.org/I106165777"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kyle Langlois","raw_affiliation_strings":["University of Central Florida,Department of Chemistry,Orlando,FL,USA"],"affiliations":[{"raw_affiliation_string":"University of Central Florida,Department of Chemistry,Orlando,FL,USA","institution_ids":["https://openalex.org/I106165777"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020320447","display_name":"Fernando J. Uribe\u2010Romo","orcid":"https://orcid.org/0000-0003-0212-0295"},"institutions":[{"id":"https://openalex.org/I106165777","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12","country_code":"US","type":"education","lineage":["https://openalex.org/I106165777"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fernando Uribe-Romo","raw_affiliation_strings":["University of Central Florida,Department of Chemistry,Orlando,FL,USA"],"affiliations":[{"raw_affiliation_string":"University of Central Florida,Department of Chemistry,Orlando,FL,USA","institution_ids":["https://openalex.org/I106165777"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061644705","display_name":"David E. Breen","orcid":"https://orcid.org/0000-0002-1376-5008"},"institutions":[{"id":"https://openalex.org/I72816309","display_name":"Drexel University","ror":"https://ror.org/04bdffz58","country_code":"US","type":"education","lineage":["https://openalex.org/I72816309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Breen","raw_affiliation_strings":["Drexel University,Department of Computer Science,Philadelphia,PA,USA"],"affiliations":[{"raw_affiliation_string":"Drexel University,Department of Computer Science,Philadelphia,PA,USA","institution_ids":["https://openalex.org/I72816309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035913440","display_name":"Jane Greenberg","orcid":"https://orcid.org/0000-0001-7819-5360"},"institutions":[{"id":"https://openalex.org/I72816309","display_name":"Drexel University","ror":"https://ror.org/04bdffz58","country_code":"US","type":"education","lineage":["https://openalex.org/I72816309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jane Greenberg","raw_affiliation_strings":["Drexel University,Department of Information Science,Philadelphia,PA,USA"],"affiliations":[{"raw_affiliation_string":"Drexel University,Department of Information Science,Philadelphia,PA,USA","institution_ids":["https://openalex.org/I72816309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5045632528"],"corresponding_institution_ids":["https://openalex.org/I72816309"],"apc_list":null,"apc_paid":null,"fwci":0.52,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.68748246,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"2489","last_page":"2495"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9757000207901001,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6756532192230225},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.4861557185649872},{"id":"https://openalex.org/keywords/knowledge-extraction","display_name":"Knowledge extraction","score":0.4292118549346924},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.37723591923713684},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3263154625892639},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.25342267751693726}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6756532192230225},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.4861557185649872},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.4292118549346924},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.37723591923713684},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3263154625892639},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.25342267751693726},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10825206","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825206","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W2053091815","https://openalex.org/W2158625887","https://openalex.org/W2274095211","https://openalex.org/W2489559760","https://openalex.org/W2794631061","https://openalex.org/W2912055463","https://openalex.org/W2984990599","https://openalex.org/W3011732786","https://openalex.org/W3033696290","https://openalex.org/W4205228445","https://openalex.org/W4206759694","https://openalex.org/W4226145682","https://openalex.org/W4242673452","https://openalex.org/W4247827174","https://openalex.org/W4362597616","https://openalex.org/W4400352977","https://openalex.org/W4400358811","https://openalex.org/W4400643083","https://openalex.org/W4400808969","https://openalex.org/W6734208355","https://openalex.org/W6771745410","https://openalex.org/W6779471954","https://openalex.org/W6782334769","https://openalex.org/W6791981150","https://openalex.org/W6802089953","https://openalex.org/W6810032434","https://openalex.org/W6861051723","https://openalex.org/W6870688838"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W93075631","https://openalex.org/W3005434123","https://openalex.org/W2599205313","https://openalex.org/W2076251662","https://openalex.org/W4231842067"],"abstract_inverted_index":{"Collections":[0],"of":[1,9,33,43,57,77,94,138,165,191],"analog":[2,78],"lab":[3,79,106,119,140,193],"notebooks":[4,80,120,141],"are":[5,39],"an":[6,55],"invaluable":[7],"source":[8],"data":[10,83,108],"about":[11],"research":[12,34,66,86,105,147],"conditions,":[13],"steps,":[14],"and":[15,17,31,54,59,99,121,128,144,171,195],"outcomes,":[16],"in":[18,202],"aggregate":[19],"have":[20],"the":[21,28,47,92,100,135,139,163,169,173,189],"potential":[22],"to":[23,102,199],"provide":[24],"new":[25],"insights":[26],"into":[27,81,158],"successes,":[29],"failures":[30],"pedagogy":[32],"laboratories.":[35],"Unfortunately,":[36],"these":[37],"artifacts":[38],"increasingly":[40],"at":[41],"risk":[42],"being":[44,87],"lost":[45],"from":[46],"historical":[48],"scientific":[49,110,136,204],"record,":[50],"given":[51],"limited":[52],"archiving":[53],"absence":[56],"computational":[58,95],"AI":[60],"readiness.":[61],"This":[62],"paper":[63,113],"reports":[64],"on":[65,117],"addressing":[67],"this":[68],"challenge":[69],"by":[70,91],"testing":[71],"mechanisms":[72],"for":[73,109,186],"transforming":[74],"digital":[75],"scans":[76],"AI-ready":[82],"resources.":[84],"The":[85,112],"pursued":[88],"is":[89],"framed":[90],"field":[93],"archival":[96,118,192],"science":[97],"(CAS)":[98],"aim":[101],"utilize":[103],"analog,":[104],"notebook":[107],"study.":[111],"presents":[114],"background":[115],"context":[116],"CAS,":[122],"discusses":[123],"MOF":[124],"(metal":[125],"organic":[126,131],"frameworks)":[127,132],"COF":[129],"(covalent":[130],"synthesis":[133],"\u2013":[134],"domain":[137],"under":[142],"study,":[143],"details":[145],"our":[146],"methods.":[148],"We":[149],"demonstrate":[150],"a":[151,179,184],"promising":[152],"approach":[153],"that":[154],"automatically":[155],"segments":[156],"pages":[157],"discrete":[159],"entry":[160],"types,":[161],"extracts":[162],"contents":[164,198],"those":[166],"entries,":[167],"refines":[168],"output":[170],"assesses":[172],"automated":[174],"results.":[175],"These":[176],"efforts":[177],"represent":[178],"first":[180],"step":[181],"towards":[182],"developing":[183],"framework":[185],"both":[187],"improving":[188],"usability":[190],"notebooks,":[194],"enabling":[196],"their":[197],"be":[200],"used":[201],"subsequent":[203],"inquiry.":[205]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
