{"id":"https://openalex.org/W4406495890","doi":"https://doi.org/10.1109/bigdata62323.2024.10825795","title":"Machine Learning (ML) Classifier to Assist Metadata Creation","display_name":"Machine Learning (ML) Classifier to Assist Metadata Creation","publication_year":2024,"publication_date":"2024-12-15","ids":{"openalex":"https://openalex.org/W4406495890","doi":"https://doi.org/10.1109/bigdata62323.2024.10825795"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata62323.2024.10825795","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825795","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"www.osti.gov/servlets/purl/2538265","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055701024","display_name":"Hannah Collier","orcid":"https://orcid.org/0000-0001-5592-8023"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hannah Collier","raw_affiliation_strings":["Oak Ridge National Laboratory,Environmental Science Division,Oak Ridge,USA"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory,Environmental Science Division,Oak Ridge,USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013650256","display_name":"Eric Enright","orcid":null},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eric Enright","raw_affiliation_strings":["Oak Ridge National Laboratory,Environmental Science Division,Oak Ridge,USA"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory,Environmental Science Division,Oak Ridge,USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069946225","display_name":"Santonu Goswami","orcid":"https://orcid.org/0000-0002-0294-0210"},"institutions":[{"id":"https://openalex.org/I148283060","display_name":"Lawrence Berkeley National Laboratory","ror":"https://ror.org/02jbv0t02","country_code":"US","type":"facility","lineage":["https://openalex.org/I1330989302","https://openalex.org/I148283060","https://openalex.org/I39565521"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sujata Goswami","raw_affiliation_strings":["Lawrence Berkeley National Laboratory,Advanced Light Source,Berkeley,USA"],"affiliations":[{"raw_affiliation_string":"Lawrence Berkeley National Laboratory,Advanced Light Source,Berkeley,USA","institution_ids":["https://openalex.org/I148283060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101838993","display_name":"Chirag Shah","orcid":"https://orcid.org/0000-0003-1305-8431"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chirag Shah","raw_affiliation_strings":["Oak Ridge National Laboratory,Environmental Science Division,Oak Ridge,USA"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory,Environmental Science Division,Oak Ridge,USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012782950","display_name":"Maggie Davis","orcid":"https://orcid.org/0000-0001-8131-9328"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Maggie Davis","raw_affiliation_strings":["Oak Ridge National Laboratory,Buildings and Transportation Science Div,Oak Ridge,USA"],"affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory,Buildings and Transportation Science Div,Oak Ridge,USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048211300","display_name":"Rachael N. Isphording","orcid":"https://orcid.org/0000-0003-4451-6204"},"institutions":[{"id":"https://openalex.org/I4210120769","display_name":"ARC Centre of Excellence for Climate System Science","ror":"https://ror.org/0288wcg94","country_code":"AU","type":"facility","lineage":["https://openalex.org/I1337719021","https://openalex.org/I2801453606","https://openalex.org/I31746571","https://openalex.org/I4210120769","https://openalex.org/I4210132349"]},{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Rachael Isphording","raw_affiliation_strings":["University of New South Wales,Climate Change Research Centre, ARC Centre of Excellence for Climate Extremes,Sydney,Australia"],"affiliations":[{"raw_affiliation_string":"University of New South Wales,Climate Change Research Centre, ARC Centre of Excellence for Climate Extremes,Sydney,Australia","institution_ids":["https://openalex.org/I4210120769","https://openalex.org/I31746571"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5055701024"],"corresponding_institution_ids":["https://openalex.org/I1289243028"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.41345578,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2072","last_page":"2079"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9692000150680542,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.8066316843032837},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7743953466415405},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5689839720726013},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.48647090792655945},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4378988742828369},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.21583539247512817}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.8066316843032837},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7743953466415405},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5689839720726013},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48647090792655945},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4378988742828369},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.21583539247512817}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bigdata62323.2024.10825795","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata62323.2024.10825795","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Big Data (BigData)","raw_type":"proceedings-article"},{"id":"pmh:oai:osti.gov:2538265","is_oa":true,"landing_page_url":"www.osti.gov/servlets/purl/2538265","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"pmh:oai:osti.gov:2538265","is_oa":true,"landing_page_url":"www.osti.gov/servlets/purl/2538265","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320332359","display_name":"Office of Science","ror":"https://ror.org/00mmn6b08"},{"id":"https://openalex.org/F4320337509","display_name":"Biological and Environmental Research","ror":"https://ror.org/0114b2m14"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W1618905105","https://openalex.org/W2101234009","https://openalex.org/W2302501749","https://openalex.org/W2889208108","https://openalex.org/W3095395190","https://openalex.org/W3129401410","https://openalex.org/W3209329320","https://openalex.org/W4210363350","https://openalex.org/W4225132456","https://openalex.org/W4281606156","https://openalex.org/W4327774590","https://openalex.org/W4387166054","https://openalex.org/W6636501900"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4306674287","https://openalex.org/W4387369504","https://openalex.org/W3046775127","https://openalex.org/W4394896187","https://openalex.org/W3170094116","https://openalex.org/W4386462264","https://openalex.org/W3107602296","https://openalex.org/W4364306694","https://openalex.org/W4312192474"],"abstract_inverted_index":{"The":[0,48],"Atmospheric":[1],"Radiation":[2],"Measurement":[3],"(ARM)":[4],"Data":[5],"Center":[6],"is":[7,32,153],"responsible":[8],"for":[9,34,56,116,127,155,173],"the":[10,100,105,108,113,120,135,144],"timely":[11],"collection,":[12],"archival,":[13],"and":[14,39,61,102,119,168,192],"curation":[15],"of":[16,44,72,80,104,122],"science":[17,46],"data":[18,28,163],"products.":[19],"These":[20,86],"products":[21],"are":[22],"freely":[23],"available":[24],"through":[25],"an":[26,123],"online":[27],"repository.":[29],"Metadata":[30],"creation":[31,121],"paramount":[33],"scientific":[35],"users":[36,53],"to":[37,54,67,76,111,165],"find":[38],"access":[40],"over":[41],"seven":[42],"petabytes":[43],"atmospheric":[45],"data.":[47],"hierarchical":[49],"metadata":[50,75,91,125,184,195],"structure":[51],"allows":[52],"search":[55],"information":[57],"at":[58],"both":[59],"broad":[60],"narrow":[62],"levels.":[63],"This":[64,97,178],"project":[65],"aims":[66],"leverage":[68],"30":[69],"years\u2019":[70],"worth":[71],"manually":[73],"created":[74],"enable":[77],"machine":[78],"predictions":[79,88],"broad-term":[81],"classifications":[82],"from":[83],"narrow-term":[84],"descriptions.":[85],"classification":[87,158],"would":[89],"assist":[90],"coordinators":[92],"with":[93,143],"their":[94],"term":[95],"selections.":[96],"paper":[98],"discusses":[99],"cleaning":[101],"preprocessing":[103],"training":[106,162],"data,":[107],"pipeline":[109],"developed":[110],"determine":[112],"best":[114],"model":[115],"this":[117,174],"task,":[118],"API":[124],"classifier":[126,180],"ARM":[128],"measurement":[129],"metadata.":[130],"Our":[131],"results":[132],"show":[133],"that":[134],"Linear":[136],"Support":[137],"Vector":[138],"Classification":[139],"(LinearSVC)":[140],"algorithm,":[141],"along":[142],"Term":[145],"Frequency":[146,150],"\u2013":[147],"Inverse":[148],"Document":[149],"(TF-IDF)":[151],"vectorizer,":[152],"well-suited":[154],"our":[156],"multi-class":[157],"task.":[159],"Lengthier":[160],"input":[161],"led":[164],"better":[166],"results,":[167],"artificial":[169],"balancing":[170],"was":[171],"unnecessary":[172],"particular":[175],"use":[176],"case.":[177],"predictive":[179],"enhances":[181],"efficiency":[182],"in":[183,194],"creation,":[185],"as":[186,188],"well":[187],"supports":[189],"greater":[190],"consistency":[191],"accuracy":[193],"tagging.":[196]},"counts_by_year":[],"updated_date":"2026-03-17T09:09:15.849793","created_date":"2025-10-10T00:00:00"}
