{"id":"https://openalex.org/W4309185501","doi":"https://doi.org/10.1109/escience55777.2022.00088","title":"Automated metadata extraction: challenges and opportunities","display_name":"Automated metadata extraction: challenges and opportunities","publication_year":2022,"publication_date":"2022-10-01","ids":{"openalex":"https://openalex.org/W4309185501","doi":"https://doi.org/10.1109/escience55777.2022.00088"},"language":"en","primary_location":{"id":"doi:10.1109/escience55777.2022.00088","is_oa":false,"landing_page_url":"https://doi.org/10.1109/escience55777.2022.00088","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 18th International Conference on e-Science (e-Science)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://www.osti.gov/servlets/purl/1897834","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079278437","display_name":"Tyler J. Skluzacek","orcid":"https://orcid.org/0000-0003-2242-4931"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tyler J. Skluzacek","raw_affiliation_strings":["Data Lifecycle and Scalable Workflows Group,Oak Ridge National Laboratory"],"affiliations":[{"raw_affiliation_string":"Data Lifecycle and Scalable Workflows Group,Oak Ridge National Laboratory","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065464552","display_name":"Kyle Chard","orcid":"https://orcid.org/0000-0002-7370-4805"},"institutions":[{"id":"https://openalex.org/I1282105669","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282105669","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kyle Chard","raw_affiliation_strings":["University of Chicago,Data Science and Learning Division, Argonne National Laboratory,Department of Computer Science","Argonne National Laboratory (ANL)"],"affiliations":[{"raw_affiliation_string":"University of Chicago,Data Science and Learning Division, Argonne National Laboratory,Department of Computer Science","institution_ids":["https://openalex.org/I1282105669"]},{"raw_affiliation_string":"Argonne National Laboratory (ANL)","institution_ids":["https://openalex.org/I1282105669"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032231503","display_name":"Ian Foster","orcid":"https://orcid.org/0000-0003-2129-5269"},"institutions":[{"id":"https://openalex.org/I1282105669","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282105669","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I40347166"]},{"id":"https://openalex.org/I40347166","display_name":"University of Chicago","ror":"https://ror.org/024mw5h28","country_code":"US","type":"education","lineage":["https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ian Foster","raw_affiliation_strings":["University of Chicago,Data Science and Learning Division, Argonne National Laboratory,Department of Computer Science","University of Chicago"],"affiliations":[{"raw_affiliation_string":"University of Chicago,Data Science and Learning Division, Argonne National Laboratory,Department of Computer Science","institution_ids":["https://openalex.org/I1282105669"]},{"raw_affiliation_string":"University of Chicago","institution_ids":["https://openalex.org/I40347166"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5079278437"],"corresponding_institution_ids":["https://openalex.org/I1289243028"],"apc_list":null,"apc_paid":null,"fwci":0.5552,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.72760911,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"495","last_page":"500"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9617999792098999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9617999792098999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.8937765955924988},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.597318172454834},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.514029860496521},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4623244106769562},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.43880727887153625},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.41068658232688904},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.10503619909286499},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.0873694121837616}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.8937765955924988},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.597318172454834},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.514029860496521},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4623244106769562},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.43880727887153625},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.41068658232688904},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.10503619909286499},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0873694121837616}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/escience55777.2022.00088","is_oa":false,"landing_page_url":"https://doi.org/10.1109/escience55777.2022.00088","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE 18th International Conference on e-Science (e-Science)","raw_type":"proceedings-article"},{"id":"pmh:oai:osti.gov:1897834","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1897834","pdf_url":"https://www.osti.gov/servlets/purl/1897834","source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},{"id":"pmh:oai:zenodo.org:7182583","is_oa":true,"landing_page_url":"https://zenodo.org/record/7182583","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/lecture"},{"id":"doi:10.5281/zenodo.7182583","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.7182583","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:osti.gov:1897834","is_oa":true,"landing_page_url":"https://www.osti.gov/biblio/1897834","pdf_url":"https://www.osti.gov/servlets/purl/1897834","source":{"id":"https://openalex.org/S4306402487","display_name":"OSTI OAI (U.S. Department of Energy Office of Scientific and Technical Information)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I139351228","host_organization_name":"Office of Scientific and Technical Information","host_organization_lineage":["https://openalex.org/I139351228"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8423070353","display_name":null,"funder_award_id":"DE-AC05-000R22725","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4309185501.pdf"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W1995875735","https://openalex.org/W2054834905","https://openalex.org/W2078962046","https://openalex.org/W2141297584","https://openalex.org/W2152818382","https://openalex.org/W2167492558","https://openalex.org/W2171262729","https://openalex.org/W2583718094","https://openalex.org/W2794512438","https://openalex.org/W3037946710","https://openalex.org/W3092185942","https://openalex.org/W3173426902","https://openalex.org/W3185089636","https://openalex.org/W4200223120","https://openalex.org/W4285181584","https://openalex.org/W4288052974","https://openalex.org/W6630822101","https://openalex.org/W6678152476","https://openalex.org/W6732452405","https://openalex.org/W6748698590","https://openalex.org/W6763188198","https://openalex.org/W6788749851"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2392768766","https://openalex.org/W2058118494","https://openalex.org/W2382021449","https://openalex.org/W2095118173","https://openalex.org/W2104269053","https://openalex.org/W2106424170","https://openalex.org/W1985426483","https://openalex.org/W2501188010","https://openalex.org/W4299935056"],"abstract_inverted_index":{"Proper":[0],"application":[1],"of":[2,38,45,71,88,140,164,200,226],"the":[3,40,85,109,120,162,176,184,224,231],"FAIR":[4],"data":[5,12,17,26,46,141,228],"principles":[6],"is":[7,92,143],"what":[8,104],"separates":[9],"a":[10,24,36,61,137,144,212],"vibrant":[11],"ecosystem,":[13],"in":[14,161,187,215,235],"which":[15],"research":[16,194,227],"are":[18,107],"frequently":[19],"shared":[20],"and":[21,43,63,119,130,146,180],"reused,":[22],"from":[23],"lifeless":[25],"graveyard.":[27],"Automated":[28],"metadata":[29,58,91,105,113,177,218],"extraction":[30,55,135,159,168,178,206,219],"systems":[31,56,160,220],"have":[32,99],"been":[33],"proposed":[34],"as":[35,102],"means":[37],"bolstering":[39],"findability,":[41],"interoperability,":[42],"reusability":[44],"repositories":[47],"with":[48,94],"little":[49],"or":[50,78],"no":[51],"human":[52,129,232],"intervention.":[53],"These":[54],"mine":[57],"by":[59,158],"crawling":[60],"repository":[62],"applying":[64],"lightweight":[65],"extractors":[66],"that,":[67],"for":[68,111,123,136,170,203],"various":[69],"types":[70],"file":[72],"(e.g.,":[73],"image,":[74],"CSV":[75],"file),":[76],"extract":[77],"synthesize":[79],"relevant":[80],"attributes.":[81],"In":[82,150],"practice,":[83],"however,":[84],"automated":[86,205],"creation":[87],"generally":[89],"useful":[90],"fraught":[93],"challenges.":[95],"Data":[96],"consumers":[97,142],"may":[98],"different":[100],"perspectives":[101],"to":[103,115,183,196],"representations":[106],"useful,":[108],"standards":[110],"recording":[112],"tend":[114],"change":[116],"over":[117],"time,":[118],"software":[121],"model":[122],"processing":[124],"updates":[125],"can":[126,222],"introduce":[127],"unnecessary":[128],"computational":[131],"effort.":[132],"Thus,":[133],"generalizing":[134,188],"broad":[138],"audience":[139],"difficult":[145],"relatively":[147],"unsolved":[148],"problem.":[149],"this":[151,209],"work,":[152],"we":[153,191],"explore":[154],"these":[155,201],"challenges":[156,202],"faced":[157,186],"context":[163,182],"constructing":[165],"our":[166],"own":[167],"system":[169],"science":[171],"data.":[172],"We":[173],"first":[174,213],"define":[175],"problem":[179],"provide":[181],"issues":[185],"metadata.":[189],"Additionally,":[190],"identify":[192],"potential":[193],"directions":[195],"help":[197],"alleviate":[198],"many":[199],"all":[204],"systems.":[207],"Ultimately,":[208],"work":[210],"represents":[211],"step":[214],"designing":[216],"Ubiquitous":[217],"that":[221],"maximize":[223],"value":[225],"while":[229],"minimizing":[230],"efforts":[233],"required":[234],"doing":[236],"so.":[237]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
