{"id":"https://openalex.org/W4416772091","doi":"https://doi.org/10.3390/make7040152","title":"LLM-Based Pipeline for Structured Knowledge Extraction from Scientific Literature on Heavy Metal Hyperaccumulation","display_name":"LLM-Based Pipeline for Structured Knowledge Extraction from Scientific Literature on Heavy Metal Hyperaccumulation","publication_year":2025,"publication_date":"2025-11-25","ids":{"openalex":"https://openalex.org/W4416772091","doi":"https://doi.org/10.3390/make7040152"},"language":"en","primary_location":{"id":"doi:10.3390/make7040152","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7040152","pdf_url":"https://www.mdpi.com/2504-4990/7/4/152/pdf?version=1764080429","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2504-4990/7/4/152/pdf?version=1764080429","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056111322","display_name":"Kirill I. Makrinsky","orcid":null},"institutions":[{"id":"https://openalex.org/I4210085913","display_name":"Frumkin Institute of Physical Chemistry and Electrochemistry","ror":"https://ror.org/004dzkd51","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I4210085913","https://openalex.org/I4210145551"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Kiril Makrinsky","raw_affiliation_strings":["Frumkin Institute of Physical Chemistry and Electrochemistry, Russian Academy of Sciences, 31/4 Leninskiy pr., 119071 Moscow, Russia"],"affiliations":[{"raw_affiliation_string":"Frumkin Institute of Physical Chemistry and Electrochemistry, Russian Academy of Sciences, 31/4 Leninskiy pr., 119071 Moscow, Russia","institution_ids":["https://openalex.org/I4210085913"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013809123","display_name":"V. P. Shendrikov","orcid":"https://orcid.org/0000-0002-2708-4940"},"institutions":[{"id":"https://openalex.org/I4210085913","display_name":"Frumkin Institute of Physical Chemistry and Electrochemistry","ror":"https://ror.org/004dzkd51","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I4210085913","https://openalex.org/I4210145551"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Valery Shendrikov","raw_affiliation_strings":["Frumkin Institute of Physical Chemistry and Electrochemistry, Russian Academy of Sciences, 31/4 Leninskiy pr., 119071 Moscow, Russia"],"affiliations":[{"raw_affiliation_string":"Frumkin Institute of Physical Chemistry and Electrochemistry, Russian Academy of Sciences, 31/4 Leninskiy pr., 119071 Moscow, Russia","institution_ids":["https://openalex.org/I4210085913"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120388194","display_name":"Anna Makhonko","orcid":null},"institutions":[{"id":"https://openalex.org/I4210085913","display_name":"Frumkin Institute of Physical Chemistry and Electrochemistry","ror":"https://ror.org/004dzkd51","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I4210085913","https://openalex.org/I4210145551"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Anna Makhonko","raw_affiliation_strings":["Frumkin Institute of Physical Chemistry and Electrochemistry, Russian Academy of Sciences, 31/4 Leninskiy pr., 119071 Moscow, Russia"],"affiliations":[{"raw_affiliation_string":"Frumkin Institute of Physical Chemistry and Electrochemistry, Russian Academy of Sciences, 31/4 Leninskiy pr., 119071 Moscow, Russia","institution_ids":["https://openalex.org/I4210085913"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120388195","display_name":"Dmitry Merkushkin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210085913","display_name":"Frumkin Institute of Physical Chemistry and Electrochemistry","ror":"https://ror.org/004dzkd51","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I4210085913","https://openalex.org/I4210145551"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Dmitry Merkushkin","raw_affiliation_strings":["Frumkin Institute of Physical Chemistry and Electrochemistry, Russian Academy of Sciences, 31/4 Leninskiy pr., 119071 Moscow, Russia"],"affiliations":[{"raw_affiliation_string":"Frumkin Institute of Physical Chemistry and Electrochemistry, Russian Academy of Sciences, 31/4 Leninskiy pr., 119071 Moscow, Russia","institution_ids":["https://openalex.org/I4210085913"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068780715","display_name":"Oleg V. Batishchev","orcid":"https://orcid.org/0000-0002-9581-2233"},"institutions":[{"id":"https://openalex.org/I4210085913","display_name":"Frumkin Institute of Physical Chemistry and Electrochemistry","ror":"https://ror.org/004dzkd51","country_code":"RU","type":"facility","lineage":["https://openalex.org/I1313323035","https://openalex.org/I4210085913","https://openalex.org/I4210145551"]}],"countries":["RU"],"is_corresponding":true,"raw_author_name":"Oleg V. Batishchev","raw_affiliation_strings":["Frumkin Institute of Physical Chemistry and Electrochemistry, Russian Academy of Sciences, 31/4 Leninskiy pr., 119071 Moscow, Russia"],"affiliations":[{"raw_affiliation_string":"Frumkin Institute of Physical Chemistry and Electrochemistry, Russian Academy of Sciences, 31/4 Leninskiy pr., 119071 Moscow, Russia","institution_ids":["https://openalex.org/I4210085913"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5068780715"],"corresponding_institution_ids":["https://openalex.org/I4210085913"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.34097758,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"7","issue":"4","first_page":"152","last_page":"152"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.6601999998092651,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.6601999998092651,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.03500000014901161,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.019899999722838402,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.8388000130653381},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.8126000165939331},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5109999775886536},{"id":"https://openalex.org/keywords/knowledge-extraction","display_name":"Knowledge extraction","score":0.48190000653266907},{"id":"https://openalex.org/keywords/knowledge-base","display_name":"Knowledge base","score":0.47940000891685486},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.41100001335144043},{"id":"https://openalex.org/keywords/scientific-literature","display_name":"Scientific literature","score":0.33489999175071716}],"concepts":[{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.8388000130653381},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.8126000165939331},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6496999859809875},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.520799994468689},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5109999775886536},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.48190000653266907},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.47940000891685486},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.428600013256073},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.41100001335144043},{"id":"https://openalex.org/C2781083858","wikidata":"https://www.wikidata.org/wiki/Q17327049","display_name":"Scientific literature","level":2,"score":0.33489999175071716},{"id":"https://openalex.org/C115925183","wikidata":"https://www.wikidata.org/wiki/Q1412694","display_name":"Knowledge-based systems","level":2,"score":0.3122999966144562},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3075000047683716},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.30250000953674316},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29120001196861267},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.28679999709129333},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.28459998965263367},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.2676999866962433},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.26499998569488525},{"id":"https://openalex.org/C2777220311","wikidata":"https://www.wikidata.org/wiki/Q6423340","display_name":"Knowledge acquisition","level":2,"score":0.26100000739097595},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.2554999887943268},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.25060001015663147}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/make7040152","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7040152","pdf_url":"https://www.mdpi.com/2504-4990/7/4/152/pdf?version=1764080429","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:7172af14006e4d7c955653d425c87f8e","is_oa":true,"landing_page_url":"https://doaj.org/article/7172af14006e4d7c955653d425c87f8e","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning and Knowledge Extraction, Vol 7, Iss 4, p 152 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/make7040152","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make7040152","pdf_url":"https://www.mdpi.com/2504-4990/7/4/152/pdf?version=1764080429","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5639438761","display_name":null,"funder_award_id":"075-15-2024-534","funder_id":"https://openalex.org/F4320327494","funder_display_name":"Ministry of Science and Higher Education of the Russian Federation"},{"id":"https://openalex.org/G6444257319","display_name":null,"funder_award_id":"075-15-","funder_id":"https://openalex.org/F4320327494","funder_display_name":"Ministry of Science and Higher Education of the Russian Federation"}],"funders":[{"id":"https://openalex.org/F4320327494","display_name":"Ministry of Science and Higher Education of the Russian Federation","ror":null}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416772091.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,149],"rapid":[1],"growth":[2],"of":[3,6,97,142],"the":[4,140],"body":[5],"literature":[7],"on":[8],"heavy":[9],"metal":[10,70],"hyperaccumulation":[11],"in":[12,19,105,145],"plants":[13],"has":[14],"created":[15],"a":[16,52,87,111,119,166],"critical":[17],"bottleneck":[18],"data":[20,60],"synthesis.":[21],"Manual":[22],"curation":[23],"is":[24],"slow,":[25],"labor-intensive,":[26],"and":[27,49,61,73,164],"not":[28,135],"scalable.":[29],"To":[30],"address":[31],"this":[32],"issue,":[33],"we":[34],"developed":[35],"an":[36],"artificial":[37],"intelligence":[38],"pipeline":[39,84],"that":[40,93,130],"automatically":[41],"transforms":[42],"unstructured":[43],"scientific":[44,146],"papers,":[45],"including":[46],"text,":[47],"tables,":[48],"figures,":[50],"into":[51],"structured":[53],"knowledge":[54,147],"database.":[55],"Our":[56,100],"system":[57],"recovers":[58],"numerical":[59],"extracts":[62],"key":[63],"experimental":[64],"parameters,":[65],"such":[66],"as":[67],"plant":[68],"species,":[69],"types,":[71],"concentrations,":[72],"growing":[74],"conditions.":[75],"This":[76,128],"enables":[77],"on-demand":[78],"dataset":[79,92],"generation.":[80],"We":[81,109],"validated":[82],"our":[83],"by":[85],"replicating":[86],"recently":[88],"published,":[89],"manually":[90],"curated":[91],"required":[94],"seven":[95],"months":[96],"expert":[98],"effort.":[99],"tool":[101],"achieved":[102],"comparable":[103],"accuracy":[104],"minutes":[106],"per":[107],"article.":[108],"implemented":[110],"dual-validation":[112],"strategy":[113],"combining":[114],"standard":[115],"extraction":[116,132],"metrics":[117],"with":[118],"qualitative":[120],"\u201cLLM-as-a-Judge\u201d":[121],"fact-checking":[122],"layer":[123],"to":[124],"assess":[125],"contextual":[126],"correctness.":[127],"revealed":[129],"high":[131],"performance":[133],"does":[134],"guarantee":[136],"factual":[137],"reliability,":[138],"underscoring":[139],"necessity":[141],"semantic":[143],"validation":[144],"extraction.":[148],"resulting":[150],"open,":[151],"reproducible":[152],"framework":[153],"accelerates":[154],"evidence":[155],"synthesis,":[156],"supports":[157],"trend":[158],"analysis":[159],"(e.g.,":[160],"metal\u2013plant":[161],"co-occurrence":[162],"networks),":[163],"provides":[165],"scalable":[167],"solution":[168],"for":[169],"data-driven":[170],"environmental":[171],"research.":[172]},"counts_by_year":[],"updated_date":"2026-03-22T08:09:32.410652","created_date":"2025-11-25T00:00:00"}
