{"id":"https://openalex.org/W7160843417","doi":"https://doi.org/10.48550/arxiv.2605.06728","title":"OmicsLM: A Multimodal Large Language Model for Multi-Sample Omics Reasoning","display_name":"OmicsLM: A Multimodal Large Language Model for Multi-Sample Omics Reasoning","publication_year":2026,"publication_date":"2026-05-07","ids":{"openalex":"https://openalex.org/W7160843417","doi":"https://doi.org/10.48550/arxiv.2605.06728"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.06728","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06728","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.06728","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057763067","display_name":"Maciej Sypetkowski","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sypetkowski, Maciej","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135837928","display_name":"Joanna Krawczyk","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Krawczyk, Joanna","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135872876","display_name":"\u0141ukasz Smoli\u0144ski","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Smoli\u0144ski, \u0141ukasz","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135850914","display_name":"Remigiusz Kinas","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kinas, Remigiusz","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135899526","display_name":"Przemys\u0142aw Pietrzak","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pietrzak, Przemys\u0142aw","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083420446","display_name":"Tomasz Jetka","orcid":"https://orcid.org/0000-0001-7449-9818"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jetka, Tomasz","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5046898079","display_name":"Rafa\u0142 Powalski","orcid":"https://orcid.org/0000-0001-8509-5683"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Powalski, Rafa\u0142","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.4269999861717224,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.4269999861717224,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.15919999778270721,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11289","display_name":"Single-cell and spatial transcriptomics","score":0.10769999772310257,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/biological-data","display_name":"Biological data","score":0.6370999813079834},{"id":"https://openalex.org/keywords/biological-database","display_name":"Biological database","score":0.6281999945640564},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.5953999757766724},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5063999891281128},{"id":"https://openalex.org/keywords/transcriptome","display_name":"Transcriptome","score":0.4481000006198883},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.44029998779296875},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4172999858856201}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6790000200271606},{"id":"https://openalex.org/C201797286","wikidata":"https://www.wikidata.org/wiki/Q4914986","display_name":"Biological data","level":2,"score":0.6370999813079834},{"id":"https://openalex.org/C20901353","wikidata":"https://www.wikidata.org/wiki/Q4117139","display_name":"Biological database","level":2,"score":0.6281999945640564},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.5953999757766724},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5063999891281128},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4738999903202057},{"id":"https://openalex.org/C162317418","wikidata":"https://www.wikidata.org/wiki/Q252857","display_name":"Transcriptome","level":4,"score":0.4481000006198883},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.44029998779296875},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4172999858856201},{"id":"https://openalex.org/C28225019","wikidata":"https://www.wikidata.org/wiki/Q4915005","display_name":"Biological network","level":2,"score":0.3125999867916107},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.30889999866485596},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.3077999949455261},{"id":"https://openalex.org/C9927688","wikidata":"https://www.wikidata.org/wiki/Q4915012","display_name":"Biological pathway","level":4,"score":0.3068000078201294},{"id":"https://openalex.org/C152662350","wikidata":"https://www.wikidata.org/wiki/Q815297","display_name":"Systems biology","level":2,"score":0.3012000024318695},{"id":"https://openalex.org/C137866125","wikidata":"https://www.wikidata.org/wiki/Q4299308","display_name":"Modelling biological systems","level":3,"score":0.2971999943256378},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.29280000925064087},{"id":"https://openalex.org/C70721500","wikidata":"https://www.wikidata.org/wiki/Q177005","display_name":"Computational biology","level":1,"score":0.2865000069141388},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2757999897003174},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2678999900817871},{"id":"https://openalex.org/C55037315","wikidata":"https://www.wikidata.org/wiki/Q5421151","display_name":"Experimental data","level":2,"score":0.26080000400543213}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.06728","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06728","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.06728","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06728","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"score":0.6268010139465332,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Interpreting":[0],"transcriptomic":[1,56,110],"data":[2,113],"is":[3],"one":[4,90],"of":[5],"the":[6,64],"most":[7,15],"common":[8],"analytical":[9],"tasks":[10],"in":[11,29,89],"modern":[12],"biology.":[13],"Yet":[14],"current":[16],"models":[17,199,207],"either":[18,146],"consume":[19],"expression":[20,71,159,190,216],"profiles":[21,48,160,191],"without":[22,31],"producing":[23],"natural-language":[24,50,75],"biological":[25,51,83,121,140,151,173,213],"explanations,":[26],"or":[27,149],"reason":[28],"language":[30,117],"direct":[32],"access":[33],"to":[34,85,196],"quantitative":[35,46,70],"omics":[36,47,198],"measurements.":[37],"We":[38,93,184],"introduce":[39,167],"OmicsLM,":[40],"a":[41,59,169],"multimodal":[42],"LLM":[43,65],"that":[44,186],"connects":[45],"with":[49],"tasks.":[52],"OmicsLM":[53,95,187],"represents":[54],"each":[55],"profile":[57],"as":[58],"compact":[60],"continuous":[61,109],"representation":[62],"within":[63],"context.":[66,92],"This":[67,126],"interface":[68],"preserves":[69],"signal":[72],"while":[73,203],"allowing":[74],"instructions,":[76],"explicit":[77],"gene":[78],"mentions,":[79],"and":[80,119,123,138,193,208],"multiple":[81],"interleaved":[82],"samples":[84],"be":[86],"processed":[87],"together":[88],"model":[91],"train":[94],"on":[96,200,211],"more":[97],"than":[98],"5.5":[99],"million":[100],"instruction-following":[101],"examples":[102],"spanning":[103],"over":[104,157,215],"70":[105],"task":[106],"types,":[107],"combining":[108],"inputs,":[111],"experimental":[112],"rendered":[114],"through":[115],"diverse":[116],"templates,":[118],"free-text":[120],"knowledge":[122],"question-answering":[124],"data.":[125,217],"mixture":[127],"covers":[128],"cell":[129],"type":[130],"annotation,":[131],"perturbation":[132],"prediction,":[133,135],"clinical":[134],"pathway":[136],"reasoning,":[137],"open-ended":[139],"question":[141,174],"answering.":[142],"Existing":[143],"benchmarks":[144],"evaluate":[145],"profile-level":[147,201],"prediction":[148],"text-only":[150],"QA,":[152],"leaving":[153],"language-guided,":[154],"multi-sample":[155,172],"reasoning":[156,214],"real":[158,178],"unmeasured.":[161],"To":[162],"close":[163],"this":[164],"gap,":[165],"we":[166],"GEO-OmicsQA,":[168],"benchmark":[170],"for":[171],"answering":[175],"built":[176],"from":[177],"Gene":[179],"Expression":[180],"Omnibus":[181],"(GEO)":[182],"studies.":[183],"demonstrate":[185],"can":[188],"use":[189],"directly":[192],"perform":[194],"comparably":[195],"specialized":[197],"tasks,":[202],"outperforming":[204],"both":[205],"omics-specialized":[206],"general":[209],"LLMs":[210],"language-guided":[212]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-12T00:00:00"}
