{"id":"https://openalex.org/W7155106843","doi":"https://doi.org/10.48550/arxiv.2604.16982","title":"A phenotype-driven and evidence-governed framework for knowledge graph enrichment and hypotheses discovery in population data","display_name":"A phenotype-driven and evidence-governed framework for knowledge graph enrichment and hypotheses discovery in population data","publication_year":2026,"publication_date":"2026-04-18","ids":{"openalex":"https://openalex.org/W7155106843","doi":"https://doi.org/10.48550/arxiv.2604.16982"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.16982","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16982","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.16982","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083962910","display_name":"Adela B\u00c2RA","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"B\u00e2ra, Adela","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5121145332","display_name":"Simona-Vasilica Oprea","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Oprea, Simona-Vasilica","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.8083999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.8083999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.036400001496076584,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.03200000151991844,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5906000137329102},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.5195000171661377},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5149000287055969},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.47040000557899475},{"id":"https://openalex.org/keywords/knowledge-extraction","display_name":"Knowledge extraction","score":0.375},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.3691999912261963},{"id":"https://openalex.org/keywords/causal-structure","display_name":"Causal structure","score":0.358599990606308},{"id":"https://openalex.org/keywords/knowledge-graph","display_name":"Knowledge graph","score":0.3305000066757202}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6452999711036682},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5906000137329102},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.542900025844574},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.5195000171661377},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5164999961853027},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5149000287055969},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.47040000557899475},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.41830000281333923},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.375},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.3691999912261963},{"id":"https://openalex.org/C163504300","wikidata":"https://www.wikidata.org/wiki/Q2364925","display_name":"Causal structure","level":2,"score":0.358599990606308},{"id":"https://openalex.org/C2987255567","wikidata":"https://www.wikidata.org/wiki/Q33002955","display_name":"Knowledge graph","level":2,"score":0.3305000066757202},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.32989999651908875},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.30550000071525574},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.2897000014781952},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.26089999079704285},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C11671645","wikidata":"https://www.wikidata.org/wiki/Q5054567","display_name":"Causal model","level":2,"score":0.2583000063896179},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.25679999589920044},{"id":"https://openalex.org/C146380142","wikidata":"https://www.wikidata.org/wiki/Q1137726","display_name":"Directed graph","level":2,"score":0.2533999979496002}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.16982","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16982","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.16982","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.16982","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.8203262686729431,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Current":[0],"knowledge":[1,125],"graph":[2,43],"(KG)":[3],"construction":[4],"methods":[5],"are":[6,74,97],"confirmatory,":[7],"focusing":[8],"on":[9,128],"recovering":[10],"known":[11],"relationships":[12,72],"rather":[13],"than":[14],"identifying":[15],"novel":[16],"or":[17,123],"context-dependent":[18,142],"nodes.":[19],"This":[20],"paper":[21],"proposes":[22],"a":[23,66,90],"phenotype-driven":[24],"and":[25,36,54,62,80,106,119,145,154,160,173],"evidence-governed":[26],"framework":[27,70,136],"that":[28,73,116,133,149],"shifts":[29],"the":[30,83,111,134,163,166],"paradigm":[31],"toward":[32],"structured":[33],"hypothesis":[34,60],"discovery":[35],"controlled":[37],"KG":[38,85],"expansion.":[39],"The":[40,69],"approach":[41],"integrates":[42],"neural":[44],"networks":[45],"(GNNs)":[46],"for":[47,59],"phenotype":[48],"discovery,":[49,120],"causal":[50,143],"inference,":[51],"probabilistic":[52],"reasoning":[53],"large":[55],"language":[56],"models":[57],"(LLMs)":[58],"generation":[61],"claim":[63],"extraction":[64],"within":[65],"unified":[67],"pipeline.":[68],"prioritizes":[71],"both":[75,152],"structurally":[76],"supported":[77],"by":[78],"data":[79,153],"underexplored":[81],"in":[82,100,191],"literature.":[84],"expansion":[86],"is":[87],"formulated":[88],"as":[89],"multi-objective":[91],"optimization":[92],"problem,":[93],"where":[94],"candidate":[95],"claims":[96,115,148],"jointly":[98],"evaluated":[99],"terms":[101],"of":[102,113],"relevance,":[103],"structural":[104],"validation":[105,172],"novelty.":[107],"Pareto-optimal":[108],"selection":[109],"enables":[110],"identification":[112],"non-dominated":[114],"balance":[117],"confirmation":[118],"avoiding":[121],"trivial":[122],"redundant":[124],"inclusion.":[126],"Experiments":[127],"heterogeneous":[129],"population":[130],"datasets":[131],"demonstrate":[132],"proposed":[135],"produces":[137],"more":[138],"interpretable":[139],"phenotypes,":[140],"reveals":[141],"structures":[144],"generates":[146],"high-quality":[147],"align":[150],"with":[151],"scientific":[155],"evidence.":[156],"Compared":[157],"to":[158],"rule-based":[159],"LLM-only":[161],"baselines,":[162],"method":[164],"achieves":[165],"best":[167],"trade-off":[168],"across":[169],"plausibility,":[170],"novelty,":[171],"relevance.":[174],"In":[175],"retrieval-augmented":[176],"settings,":[177],"it":[178],"significantly":[179],"improves":[180],"performance":[181],"(Recall@5=0.98)":[182],"while":[183],"reducing":[184],"hallucination":[185],"rates":[186],"(0.05),":[187],"highlighting":[188],"its":[189],"effectiveness":[190],"grounding":[192],"LLM":[193],"outputs.":[194]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-22T00:00:00"}
