{"id":"https://openalex.org/W7162758722","doi":"https://doi.org/10.5281/zenodo.20451197","title":"Towards effective extraction of references from scientific literature with Large Language Model","display_name":"Towards effective extraction of references from scientific literature with Large Language Model","publication_year":2025,"publication_date":"2025-12-19","ids":{"openalex":"https://openalex.org/W7162758722","doi":"https://doi.org/10.5281/zenodo.20451197"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.20451197","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.20451197","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.20451197","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5105729908","display_name":"Swacha Jakub","orcid":null},"institutions":[{"id":"https://openalex.org/I104588304","display_name":"University of Szczecin","ror":"https://ror.org/05vmz5070","country_code":"PL","type":"education","lineage":["https://openalex.org/I104588304"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Jakub, Swacha","raw_affiliation_strings":["University of Szczecin, Szczecin, Poland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Szczecin, Szczecin, Poland","institution_ids":["https://openalex.org/I104588304"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137339615","display_name":"Maskeliuna Rytis","orcid":null},"institutions":[{"id":"https://openalex.org/I172574986","display_name":"Kaunas University of Technology","ror":"https://ror.org/01me6gb93","country_code":"LT","type":"education","lineage":["https://openalex.org/I172574986"]}],"countries":["LT"],"is_corresponding":false,"raw_author_name":"Rytis, Maskeliuna","raw_affiliation_strings":["Kaunas University of Technology, Kaunas, Lithuania"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kaunas University of Technology, Kaunas, Lithuania","institution_ids":["https://openalex.org/I172574986"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5137383809","display_name":"Bla\u017eauskas Tomas","orcid":null},"institutions":[{"id":"https://openalex.org/I172574986","display_name":"Kaunas University of Technology","ror":"https://ror.org/01me6gb93","country_code":"LT","type":"education","lineage":["https://openalex.org/I172574986"]}],"countries":["LT"],"is_corresponding":false,"raw_author_name":"Tomas, Bla\u017eauskas","raw_affiliation_strings":["Kaunas University of Technology, Kaunas, Lithuania"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Kaunas University of Technology, Kaunas, Lithuania","institution_ids":["https://openalex.org/I172574986"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.62250153,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.305400013923645,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.305400013923645,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.21549999713897705,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.15839999914169312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.6718999743461609},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6654999852180481},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.6223999857902527},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.501800000667572},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.4562999904155731},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4560999870300293},{"id":"https://openalex.org/keywords/automation","display_name":"Automation","score":0.4537999927997589},{"id":"https://openalex.org/keywords/scientific-literature","display_name":"Scientific literature","score":0.4375},{"id":"https://openalex.org/keywords/data-extraction","display_name":"Data extraction","score":0.4196000099182129}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8127999901771545},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.6718999743461609},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6654999852180481},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.6223999857902527},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5821999907493591},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5196999907493591},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.501800000667572},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4997999966144562},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.4562999904155731},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4560999870300293},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.4537999927997589},{"id":"https://openalex.org/C2781083858","wikidata":"https://www.wikidata.org/wiki/Q17327049","display_name":"Scientific literature","level":2,"score":0.4375},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.4196000099182129},{"id":"https://openalex.org/C17231256","wikidata":"https://www.wikidata.org/wiki/Q5156540","display_name":"Completeness (order theory)","level":2,"score":0.41290000081062317},{"id":"https://openalex.org/C2778805511","wikidata":"https://www.wikidata.org/wiki/Q1713","display_name":"Citation","level":2,"score":0.41100001335144043},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3637999892234802},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3578999936580658},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.34610000252723694},{"id":"https://openalex.org/C2779980791","wikidata":"https://www.wikidata.org/wiki/Q1789476","display_name":"Bibliographic database","level":2,"score":0.321399986743927},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3147999942302704},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C63882131","wikidata":"https://www.wikidata.org/wiki/Q17122954","display_name":"Strengths and weaknesses","level":2,"score":0.3059000074863434},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.30570000410079956},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.3027999997138977},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.29319998621940613},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.29019999504089355},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.289000004529953},{"id":"https://openalex.org/C189708586","wikidata":"https://www.wikidata.org/wiki/Q1504425","display_name":"Systematic review","level":3,"score":0.2773999869823456},{"id":"https://openalex.org/C178315738","wikidata":"https://www.wikidata.org/wiki/Q603441","display_name":"Bibliometrics","level":2,"score":0.2678000032901764},{"id":"https://openalex.org/C105345328","wikidata":"https://www.wikidata.org/wiki/Q206276","display_name":"Citation analysis","level":3,"score":0.2597000002861023},{"id":"https://openalex.org/C2777946921","wikidata":"https://www.wikidata.org/wiki/Q7449044","display_name":"Semantic analysis (machine learning)","level":2,"score":0.2556999921798706},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.2533000111579895}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.20451197","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.20451197","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":""}],"best_oa_location":{"id":"doi:10.5281/zenodo.20451197","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.20451197","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":""},"sustainable_development_goals":[{"score":0.5490943789482117,"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"unprecedented":[1],"growth":[2],"of":[3,13,16,22,71,105,137,179],"scientific":[4],"literature":[5,8,17,172],"makes":[6],"comprehensive":[7],"analysis":[9,18],"increasingly":[10],"challenging.":[11],"One":[12],"important":[14],"stages":[15],"is":[19,42,48],"the":[20,69,100,168,176],"extraction":[21,158],"references,":[23],"necessary":[24],"for":[25,76,147],"various":[26],"purposes,":[27],"such":[28],"as":[29],"backward":[30],"snowballing":[31],"or":[32],"citation":[33],"network":[34],"analysis.":[35],"Its":[36],"manual":[37,177],"execution":[38],"in":[39,63,170],"large":[40],"scale":[41],"prohibitively":[43],"labor-intensive":[44],"and":[45,55,92,123],"its":[46],"automation":[47],"not":[49],"trivial":[50],"due":[51],"to":[52,90,167],"non-typical":[53],"references":[54,110],"minor":[56],"errors":[57],"that":[58,85,142],"are":[59,144],"very":[60],"often":[61],"encountered":[62],"bibliographic":[64,180],"data.":[65,98],"This":[66,164],"paper":[67,88],"investigates":[68],"use":[70],"Large":[72],"Language":[73],"Models":[74],"(LLMs)":[75],"this":[77,148],"purpose.":[78],"We":[79],"present":[80],"a":[81,103,128],"novel,":[82],"domain-agnostic":[83],"pipeline":[84,101],"processes":[86],"research":[87,114],"PDFs":[89],"extract":[91],"parse":[93],"reference":[94],"lists":[95],"into":[96],"structured":[97],"Evaluating":[99],"on":[102],"corpus":[104],"20":[106],"papers":[107],"containing":[108],"1,070":[109],"across":[111],"four":[112],"diverse":[113],"domains,":[115],"we":[116],"demonstrate":[117],"high":[118],"accuracy":[119],"(95%":[120],"success":[121],"rate)":[122],"robust":[124],"performance,":[125],"quantified":[126],"by":[127,154],"new":[129],"Bibliographic":[130],"Information":[131],"Completeness":[132],"(BIC)":[133],"metric":[134],"(average":[135],"score":[136],"0.79).":[138],"Our":[139],"results":[140],"confirm":[141],"LLMs":[143],"highly":[145],"effective":[146],"task,":[149],"with":[150],"performance":[151],"primarily":[152],"constrained":[153],"upstream":[155],"PDF":[156],"text":[157],"quality":[159],"rather":[160],"than":[161],"semantic":[162],"understanding.":[163],"work":[165],"contributes":[166],"progress":[169],"AI-supported":[171],"analysis,":[173],"significantly":[174],"reducing":[175],"burden":[178],"data":[181],"collection.":[182]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-30T00:00:00"}
