{"id":"https://openalex.org/W4378174056","doi":"https://doi.org/10.48550/arxiv.2305.13338","title":"Gene Set Summarization using Large Language Models","display_name":"Gene Set Summarization using Large Language Models","publication_year":2023,"publication_date":"2023-05-21","ids":{"openalex":"https://openalex.org/W4378174056","doi":"https://doi.org/10.48550/arxiv.2305.13338"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2305.13338","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.13338","pdf_url":"https://arxiv.org/pdf/2305.13338","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2305.13338","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062038813","display_name":"Marcin P. Joachimiak","orcid":"https://orcid.org/0000-0001-8175-045X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Joachimiak, Marcin P.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035050896","display_name":"J. Harry Caufield","orcid":"https://orcid.org/0000-0001-5705-7831"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Caufield, J. Harry","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031007073","display_name":"Nomi L. Harris","orcid":"https://orcid.org/0000-0001-6315-3707"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Harris, Nomi L.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002223413","display_name":"Chris Mungall","orcid":"https://orcid.org/0000-0002-6601-2165"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Hyeongsik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Mungall, Christopher J.","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mungall, Christopher J.","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5062038813"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10885","display_name":"Gene expression and cancer classification","score":0.9836999773979187,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.8042176961898804},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7361245155334473},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6164281368255615},{"id":"https://openalex.org/keywords/ontology","display_name":"Ontology","score":0.6047698259353638},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5234196186065674},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.5228155851364136},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.46613118052482605},{"id":"https://openalex.org/keywords/gene-ontology","display_name":"Gene ontology","score":0.4620610475540161},{"id":"https://openalex.org/keywords/complement","display_name":"Complement (music)","score":0.4336961507797241},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4223254919052124},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.41421836614608765},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3432791829109192},{"id":"https://openalex.org/keywords/gene","display_name":"Gene","score":0.2385624647140503},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.17054226994514465},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.11788663268089294},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.10967662930488586}],"concepts":[{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.8042176961898804},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7361245155334473},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6164281368255615},{"id":"https://openalex.org/C25810664","wikidata":"https://www.wikidata.org/wiki/Q44325","display_name":"Ontology","level":2,"score":0.6047698259353638},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5234196186065674},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.5228155851364136},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46613118052482605},{"id":"https://openalex.org/C2987395477","wikidata":"https://www.wikidata.org/wiki/Q135085","display_name":"Gene ontology","level":4,"score":0.4620610475540161},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.4336961507797241},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4223254919052124},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.41421836614608765},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3432791829109192},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.2385624647140503},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.17054226994514465},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.11788663268089294},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.10967662930488586},{"id":"https://openalex.org/C188082640","wikidata":"https://www.wikidata.org/wiki/Q1780899","display_name":"Complementation","level":4,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C127716648","wikidata":"https://www.wikidata.org/wiki/Q104053","display_name":"Phenotype","level":3,"score":0.0},{"id":"https://openalex.org/C150194340","wikidata":"https://www.wikidata.org/wiki/Q26972","display_name":"Gene expression","level":3,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2305.13338","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.13338","pdf_url":"https://arxiv.org/pdf/2305.13338","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2305.13338","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2305.13338","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2305.13338","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2305.13338","pdf_url":"https://arxiv.org/pdf/2305.13338","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[{"score":0.75,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2366403280","https://openalex.org/W1495108544","https://openalex.org/W2091301346","https://openalex.org/W3148229873","https://openalex.org/W2150160875","https://openalex.org/W4242223894","https://openalex.org/W1517524280","https://openalex.org/W4323520239","https://openalex.org/W4389760904","https://openalex.org/W4306886878"],"abstract_inverted_index":{"Molecular":[0],"biologists":[1],"frequently":[2],"interpret":[3],"gene":[4,54,108,126,141,166],"lists":[5,55,164],"derived":[6,132],"from":[7,42,133,202],"high-throughput":[8],"experiments":[9],"and":[10,78,158,179,199,212,251],"computational":[11],"analysis.":[12,118],"This":[13,119],"is":[14],"typically":[15],"done":[16],"as":[17,48,60,112,243],"a":[18,43,61,82,100,113,244],"statistical":[19],"enrichment":[20,117,249],"analysis":[21,250],"that":[22,102,150,183,235,252],"measures":[23],"the":[24,49,66,196],"over-":[25],"or":[26,35,143,177],"under-representation":[27],"of":[28,68,90,94,125,255],"biological":[29],"function":[30,110],"terms":[31,182],"associated":[32],"with":[33,221],"genes":[34],"their":[36],"properties,":[37],"based":[38],"on":[39,81],"curated":[40,134],"assertions":[41,257],"knowledge":[44],"base":[45],"(KB)":[46],"such":[47],"Gene":[50],"Ontology":[51,98],"(GO).":[52],"Interpreting":[53],"can":[56,121],"also":[57],"be":[58],"framed":[59],"textual":[62],"summarization":[63,111],"task,":[64],"enabling":[65],"use":[67,122],"Large":[69],"Language":[70,92],"Models":[71],"(LLMs),":[72],"potentially":[73],"utilizing":[74],"scientific":[75],"texts":[76],"directly":[77],"avoiding":[79],"reliance":[80],"KB.":[83],"We":[84,148],"developed":[85],"SPINDOCTOR":[86],"(Structured":[87],"Prompt":[88],"Interpolation":[89],"Natural":[91],"Descriptions":[93],"Controlled":[95],"Terms":[96],"for":[97,165,246],"Reporting),":[99],"method":[101,120],"uses":[103],"GPT":[104],"models":[105],"to":[106,115,155,173,194,207,210],"perform":[107],"set":[109],"complement":[114],"standard":[116,203,247],"different":[123,229],"sources":[124],"functional":[127],"information:":[128],"(1)":[129],"structured":[130],"text":[131],"ontological":[135,256],"KB":[136],"annotations,":[137],"(2)":[138],"ontology-free":[139],"narrative":[140],"summaries,":[142],"(3)":[144],"direct":[145],"model":[146],"retrieval.":[147],"demonstrate":[149],"these":[151,189],"methods":[152,190,240],"are":[153,171,184,218,241],"able":[154,193],"generate":[156],"plausible":[157],"biologically":[159],"valid":[160],"summary":[161],"GO":[162],"term":[163,201,230,248],"sets.":[167],"However,":[168],"GPT-based":[169],"approaches":[170],"unable":[172],"deliver":[174],"reliable":[175],"scores":[176],"p-values":[178],"often":[180],"return":[181],"not":[185],"statistically":[186],"significant.":[187],"Crucially,":[188],"were":[191],"rarely":[192],"recapitulate":[195],"most":[197],"precise":[198],"informative":[200],"enrichment,":[204],"likely":[205],"due":[206],"an":[208,215],"inability":[209],"generalize":[211],"reason":[213],"using":[214],"ontology.":[216],"Results":[217],"highly":[219],"nondeterministic,":[220],"minor":[222],"variations":[223],"in":[224,227],"prompt":[225],"resulting":[226],"radically":[228],"lists.":[231],"Our":[232],"results":[233],"show":[234],"at":[236],"this":[237],"point,":[238],"LLM-based":[239],"unsuitable":[242],"replacement":[245],"manual":[253],"curation":[254],"remains":[258],"necessary.":[259]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":2}],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
