{"id":"https://openalex.org/W4414648336","doi":"https://doi.org/10.1109/cibcb66090.2025.11177124","title":"Knowledge-Enriched Cell-Type Annotation in Single-Cell Transcriptomics via LLM Embeddings","display_name":"Knowledge-Enriched Cell-Type Annotation in Single-Cell Transcriptomics via LLM Embeddings","publication_year":2025,"publication_date":"2025-08-20","ids":{"openalex":"https://openalex.org/W4414648336","doi":"https://doi.org/10.1109/cibcb66090.2025.11177124"},"language":"en","primary_location":{"id":"doi:10.1109/cibcb66090.2025.11177124","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cibcb66090.2025.11177124","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Conference on Computational Intelligence in Bioinformatics and Computational Biology (CIBCB)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119775217","display_name":"Andrea Fabbricatore","orcid":null},"institutions":[{"id":"https://openalex.org/I71209653","display_name":"Bocconi University","ror":"https://ror.org/05crjpb27","country_code":"IT","type":"education","lineage":["https://openalex.org/I71209653"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Andrea Fabbricatore","raw_affiliation_strings":["Bocconi University,Department of Computing Sciences,Milan,Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bocconi University,Department of Computing Sciences,Milan,Italy","institution_ids":["https://openalex.org/I71209653"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075865182","display_name":"Francesca M. Buffa","orcid":"https://orcid.org/0000-0003-0409-406X"},"institutions":[{"id":"https://openalex.org/I71209653","display_name":"Bocconi University","ror":"https://ror.org/05crjpb27","country_code":"IT","type":"education","lineage":["https://openalex.org/I71209653"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Francesca M. Buffa","raw_affiliation_strings":["Bocconi University,Bocconi Institute for Data Science and Analytics,Department of Computing Sciences,Milan,Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bocconi University,Bocconi Institute for Data Science and Analytics,Department of Computing Sciences,Milan,Italy","institution_ids":["https://openalex.org/I71209653"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056055676","display_name":"Andrea Tangherloni","orcid":"https://orcid.org/0000-0002-5856-4453"},"institutions":[{"id":"https://openalex.org/I71209653","display_name":"Bocconi University","ror":"https://ror.org/05crjpb27","country_code":"IT","type":"education","lineage":["https://openalex.org/I71209653"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Andrea Tangherloni","raw_affiliation_strings":["Bocconi University,Bocconi Institute for Data Science and Analytics,Department of Computing Sciences,Milan,Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Bocconi University,Bocconi Institute for Data Science and Analytics,Department of Computing Sciences,Milan,Italy","institution_ids":["https://openalex.org/I71209653"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20453097,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11289","display_name":"Single-cell and spatial transcriptomics","score":0.977400004863739,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11289","display_name":"Single-cell and spatial transcriptomics","score":0.977400004863739,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.7067999839782715},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5855000019073486},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5376999974250793},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.47429999709129333},{"id":"https://openalex.org/keywords/biological-data","display_name":"Biological data","score":0.4609000086784363},{"id":"https://openalex.org/keywords/biological-database","display_name":"Biological database","score":0.41359999775886536},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.4068000018596649},{"id":"https://openalex.org/keywords/gene-regulatory-network","display_name":"Gene regulatory network","score":0.3287000060081482}],"concepts":[{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.7067999839782715},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6477000117301941},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5855000019073486},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5425000190734863},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5376999974250793},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.47429999709129333},{"id":"https://openalex.org/C201797286","wikidata":"https://www.wikidata.org/wiki/Q4914986","display_name":"Biological data","level":2,"score":0.4609000086784363},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.4390000104904175},{"id":"https://openalex.org/C20901353","wikidata":"https://www.wikidata.org/wiki/Q4117139","display_name":"Biological database","level":2,"score":0.41359999775886536},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4068000018596649},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3995000123977661},{"id":"https://openalex.org/C67339327","wikidata":"https://www.wikidata.org/wiki/Q1502576","display_name":"Gene regulatory network","level":4,"score":0.3287000060081482},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.32839998602867126},{"id":"https://openalex.org/C60908668","wikidata":"https://www.wikidata.org/wiki/Q690207","display_name":"Perceptron","level":3,"score":0.3181000053882599},{"id":"https://openalex.org/C28225019","wikidata":"https://www.wikidata.org/wiki/Q4915005","display_name":"Biological network","level":2,"score":0.3095000088214874},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.2989000082015991},{"id":"https://openalex.org/C89566754","wikidata":"https://www.wikidata.org/wiki/Q2273828","display_name":"Genome project","level":4,"score":0.2847000062465668},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C2908923196","wikidata":"https://www.wikidata.org/wiki/Q5205742","display_name":"Gene Annotation","level":4,"score":0.2736999988555908},{"id":"https://openalex.org/C162317418","wikidata":"https://www.wikidata.org/wiki/Q252857","display_name":"Transcriptome","level":4,"score":0.2667999863624573},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C189206191","wikidata":"https://www.wikidata.org/wiki/Q222046","display_name":"Genomics","level":4,"score":0.26409998536109924},{"id":"https://openalex.org/C179717631","wikidata":"https://www.wikidata.org/wiki/Q2991667","display_name":"Multilayer perceptron","level":3,"score":0.2522999942302704},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cibcb66090.2025.11177124","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cibcb66090.2025.11177124","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Conference on Computational Intelligence in Bioinformatics and Computational Biology (CIBCB)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W2146980885","https://openalex.org/W2177432730","https://openalex.org/W2523620612","https://openalex.org/W2605810679","https://openalex.org/W2622807556","https://openalex.org/W2774307122","https://openalex.org/W2792649309","https://openalex.org/W2806983506","https://openalex.org/W2895456557","https://openalex.org/W2900144939","https://openalex.org/W2901006884","https://openalex.org/W2907514116","https://openalex.org/W2907783748","https://openalex.org/W2911489562","https://openalex.org/W2949177718","https://openalex.org/W2964643116","https://openalex.org/W2970771982","https://openalex.org/W2982231537","https://openalex.org/W3004512212","https://openalex.org/W3024842005","https://openalex.org/W3046375318","https://openalex.org/W3114485724","https://openalex.org/W3118460357","https://openalex.org/W3127238141","https://openalex.org/W3190088085","https://openalex.org/W4221065117","https://openalex.org/W4285090748","https://openalex.org/W4293518950","https://openalex.org/W4293519310","https://openalex.org/W4297253404","https://openalex.org/W4316096017","https://openalex.org/W4365511667","https://openalex.org/W4386699707","https://openalex.org/W4387251311","https://openalex.org/W4387339852","https://openalex.org/W4388870182","https://openalex.org/W4392168151","https://openalex.org/W4401455022","https://openalex.org/W4404126723","https://openalex.org/W4412888751"],"related_works":[],"abstract_inverted_index":{"Single-cell":[0],"RNA":[1],"sequencing":[2],"(scRNA-seq)":[3],"has":[4],"profoundly":[5],"reshaped":[6],"our":[7,154],"understanding":[8],"of":[9,111,139],"cellular":[10],"diversity":[11],"and":[12,40,88,105,152,164,193],"functionality;":[13],"however,":[14],"accurate":[15],"cell-type":[16,79],"annotation":[17,24],"is":[18],"required":[19],"for":[20,74,173,184],"biological":[21,43,54,150,165,176],"interpretation.":[22],"Current":[23],"methods,":[25],"which":[26],"are":[27],"predominantly":[28],"reliant":[29],"on":[30],"gene":[31,57,67,189],"expression":[32],"alone":[33],"or":[34],"manual":[35],"curation,":[36],"suffer":[37],"from":[38,60,148],"subjectivity":[39],"a":[41,48,89,170],"limited":[42],"context.":[44],"Here,":[45],"we":[46,83],"introduce":[47],"novel":[49],"approach":[50,155],"that":[51,101,118,127],"integrates":[52],"textual":[53],"knowledge":[55,147],"via":[56],"embeddings,":[58],"derived":[59],"fine-tuning":[61],"Large":[62],"Language":[63],"Models":[64],"(LLMs),":[65],"with":[66],"counts":[68,107],"to":[69,93,160],"enrich":[70],"the":[71,96,109,137,157],"input":[72],"space":[73],"supervised":[75],"models":[76],"in":[77,114],"automatic":[78],"classification.":[80],"In":[81],"particular,":[82],"trained":[84],"an":[85],"XGBoost":[86],"model":[87],"multi-layer":[90],"perceptron":[91],"(MLP)":[92],"automatically":[94],"classify":[95],"cell":[97],"populations.":[98],"We":[99],"demonstrate":[100],"combining":[102],"Modern-BERT":[103],"embeddings":[104,131],"raw":[106],"enhances":[108,156],"performance":[110],"MLPs,":[112],"particularly":[113],"complex":[115],"classification":[116],"scenarios":[117],"involve":[119],"subtle":[120],"cell-subtype":[121],"distinctions.":[122],"Our":[123],"results":[124],"also":[125],"show":[126],"ModernBERT":[128],"generated":[129],"better":[130],"than":[132],"smaller":[133],"LLM":[134],"architectures,":[135],"underlining":[136],"value":[138],"enriched,":[140],"biologically":[141],"informed":[142],"embeddings.":[143],"By":[144],"embedding":[145],"prior":[146],"curated":[149],"databases":[151],"literature,":[153],"MLP\u2019s":[158],"ability":[159],"distinguish":[161],"sub-cell":[162],"populations":[163],"signals.":[166],"This":[167],"work":[168],"provides":[169],"scalable":[171],"framework":[172],"integrating":[174],"broader":[175],"context":[177],"into":[178],"scRNA-seq":[179],"analyses,":[180],"offering":[181],"new":[182],"opportunities":[183],"downstream":[185],"tasks":[186],"such":[187],"as":[188],"regulatory":[190],"network":[191],"inference":[192],"cross-species":[194],"annotation.":[195]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
