{"id":"https://openalex.org/W7162769505","doi":"https://doi.org/10.48550/arxiv.2605.29184","title":"Influence-Guided Symbolic Regression: Scientific Discovery via LLM-Driven Equation Search with Granular Feedback","display_name":"Influence-Guided Symbolic Regression: Scientific Discovery via LLM-Driven Equation Search with Granular Feedback","publication_year":2026,"publication_date":"2026-05-27","ids":{"openalex":"https://openalex.org/W7162769505","doi":"https://doi.org/10.48550/arxiv.2605.29184"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.29184","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.29184","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.29184","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050007929","display_name":"Evgeny S. Saveliev","orcid":"https://orcid.org/0000-0003-2887-0342"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saveliev, Evgeny S.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137340061","display_name":"Samuel Holt","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Holt, Samuel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067213867","display_name":"Nabeel Seedat","orcid":"https://orcid.org/0000-0002-2134-6639"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Seedat, Nabeel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137343841","display_name":"David L. Bentley","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bentley, David L.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060506150","display_name":"Jim Weatherall","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Weatherall, Jim","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5137330621","display_name":"Mihaela van der Schaar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"van der Schaar, Mihaela","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.2639999985694885,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.2639999985694885,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.09480000287294388,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.06830000132322311,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.5077000260353088},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4480000138282776},{"id":"https://openalex.org/keywords/business-process-discovery","display_name":"Business process discovery","score":0.41519999504089355},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.4065999984741211},{"id":"https://openalex.org/keywords/scientific-discovery","display_name":"Scientific discovery","score":0.40540000796318054},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.3806000053882599},{"id":"https://openalex.org/keywords/scalar","display_name":"Scalar (mathematics)","score":0.37470000982284546},{"id":"https://openalex.org/keywords/search-algorithm","display_name":"Search algorithm","score":0.32510000467300415}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6086999773979187},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.5077000260353088},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4480000138282776},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4284000098705292},{"id":"https://openalex.org/C93453677","wikidata":"https://www.wikidata.org/wiki/Q1017580","display_name":"Business process discovery","level":5,"score":0.41519999504089355},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.4065999984741211},{"id":"https://openalex.org/C2984917352","wikidata":"https://www.wikidata.org/wiki/Q12772819","display_name":"Scientific discovery","level":2,"score":0.40540000796318054},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.3806000053882599},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3790999948978424},{"id":"https://openalex.org/C57691317","wikidata":"https://www.wikidata.org/wiki/Q1289248","display_name":"Scalar (mathematics)","level":2,"score":0.37470000982284546},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.36890000104904175},{"id":"https://openalex.org/C125583679","wikidata":"https://www.wikidata.org/wiki/Q755673","display_name":"Search algorithm","level":2,"score":0.32510000467300415},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.3203999996185303},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.3165999948978424},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.3142000138759613},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31040000915527344},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.3100000023841858},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.290800005197525},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.28690001368522644},{"id":"https://openalex.org/C185429906","wikidata":"https://www.wikidata.org/wiki/Q1130160","display_name":"Estimator","level":2,"score":0.27720001339912415},{"id":"https://openalex.org/C19499675","wikidata":"https://www.wikidata.org/wiki/Q232207","display_name":"Monte Carlo method","level":2,"score":0.272599995136261},{"id":"https://openalex.org/C52421305","wikidata":"https://www.wikidata.org/wiki/Q1151499","display_name":"Particle filter","level":3,"score":0.25920000672340393},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2563999891281128},{"id":"https://openalex.org/C137002209","wikidata":"https://www.wikidata.org/wiki/Q898521","display_name":"Hidden variable theory","level":3,"score":0.25450000166893005},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.250900000333786}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.29184","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.29184","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.29184","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.29184","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"offer":[4],"a":[5,48,63,89,126,155,182,186,194,205],"promising":[6],"avenue":[7],"for":[8,88,178],"scientific":[9],"discovery,":[10],"yet":[11],"their":[12],"application":[13],"to":[14,43,108],"symbolic":[15],"regression":[16],"is":[17],"often":[18],"constrained":[19],"by":[20],"inefficient":[21],"search":[22,136],"strategies":[23],"and":[24,168,200],"coarse":[25],"feedback":[26],"signals.":[27],"Current":[28],"methods":[29],"typically":[30],"guide":[31],"LLMs":[32],"using":[33,96,185],"scalar":[34],"metrics":[35],"(e.g.,":[36],"global":[37],"Mean":[38],"Squared":[39],"Error),":[40],"which":[41,45,92,191],"fail":[42],"identify":[44],"components":[46],"of":[47,141,147,158],"proposed":[49],"equation":[50,67],"are":[51,93],"driving":[52],"performance":[53],"or":[54],"causing":[55],"error.":[56],"We":[57,150],"introduce":[58],"\\textit{Influence-Guided":[59],"Symbolic":[60],"Regression}":[61],"(IGSR),":[62],"method":[64],"that":[65,116,207],"frames":[66],"discovery":[68,180],"as":[69],"an":[70,81,112,165],"iterative":[71],"two-step":[72],"process":[73,115],"combining":[74],"diverse":[75,156],"term":[76],"generation":[77],"with":[78,145],"rigorous":[79],"selection:":[80],"LLM":[82],"generates":[83],"candidate":[84],"basis":[85],"functions":[86],"$\u03c8_j(\\mathbf{x})$":[87],"linear":[90],"model,":[91],"then":[94],"evaluated":[95],"granular":[97],"influence":[98],"scores":[99,102],"$\u0394_j$.":[100],"These":[101],"quantify":[103],"each":[104],"term's":[105],"marginal":[106],"contribution":[107],"generalization":[109],"accuracy,":[110],"enabling":[111],"influence-guided":[113],"pruning":[114],"systematically":[117],"refines":[118],"the":[119,134,175],"model":[120],"structure.":[121],"Integrating":[122],"this":[123],"mechanism":[124],"into":[125],"Monte":[127],"Carlo":[128],"Tree":[129],"Search":[130],"(MCTS)":[131],"enables":[132],"navigating":[133],"combinatorial":[135],"space":[137],"while":[138],"balancing":[139],"exploration":[140],"novel":[142,195],"functional":[143],"forms":[144],"exploitation":[146],"high-influence":[148],"components.":[149],"demonstrate":[151],"IGSR's":[152],"effectiveness":[153],"on":[154],"suite":[157],"benchmarks,":[159],"including":[160],"LLM-SRBench,":[161],"pharmacological":[162],"PKPD":[163],"models,":[164],"epidemiological":[166],"simulation,":[167],"real-world":[169],"genomic":[170],"data.":[171],"Notably,":[172],"we":[173],"validate":[174],"framework's":[176],"capacity":[177],"genuine":[179],"in":[181,190],"case":[183],"study":[184],"high-dimensional":[187],"biological":[188],"dataset,":[189],"IGSR":[192],"identified":[193],"relationship":[196],"between":[197],"DNA":[198],"methylation":[199],"RNA":[201],"Polymerase":[202],"II":[203],"pausing;":[204],"hypothesis":[206],"was":[208],"subsequently":[209],"supported":[210],"via":[211],"wet-lab":[212],"experimentation.":[213]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-30T00:00:00"}
