{"id":"https://openalex.org/W7161785974","doi":"https://doi.org/10.48550/arxiv.2605.19357","title":"SciCustom: A Framework for Custom Evaluation of Scientific Capabilities in Large Language Models","display_name":"SciCustom: A Framework for Custom Evaluation of Scientific Capabilities in Large Language Models","publication_year":2026,"publication_date":"2026-05-19","ids":{"openalex":"https://openalex.org/W7161785974","doi":"https://doi.org/10.48550/arxiv.2605.19357"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.19357","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.19357","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.19357","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5136597865","display_name":"Yiyang Gu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gu, Yiyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136605752","display_name":"Junwei Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Junwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136515090","display_name":"Junyu Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Junyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136531945","display_name":"Ye Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Ye","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136507444","display_name":"Bin Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Feng, Bin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021772140","display_name":"Yingce Xia","orcid":"https://orcid.org/0000-0001-9823-9033"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xia, Yingce","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136585858","display_name":"Shufang Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Shufang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101944547","display_name":"Kaili Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Kaili","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136574828","display_name":"Bohan Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Bohan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136546055","display_name":"Qi Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Qi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136604864","display_name":"Haoran Li","orcid":"https://orcid.org/0009-0006-6601-2401"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Haoran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136608901","display_name":"Beier Xiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Beier","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136571690","display_name":"Zhiping Xiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Zhiping","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136549490","display_name":"Xiao Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Xiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136524723","display_name":"Weizhi Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Weizhi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136579413","display_name":"Philip S. Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Philip S.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136510848","display_name":"Zequn Liu","orcid":"https://orcid.org/0009-0007-2521-4821"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Zequn","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136539272","display_name":"Ming Zhang","orcid":"https://orcid.org/0000-0003-2956-4056"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Ming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":18,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.3346000015735626,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.3346000015735626,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.14159999787807465,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1234000027179718,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.7508999705314636},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6697999835014343},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.621399998664856},{"id":"https://openalex.org/keywords/granularity","display_name":"Granularity","score":0.48840001225471497},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.4180999994277954},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.3774999976158142},{"id":"https://openalex.org/keywords/scheme","display_name":"Scheme (mathematics)","score":0.36629998683929443},{"id":"https://openalex.org/keywords/scope","display_name":"Scope (computer science)","score":0.31470000743865967}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7754999995231628},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.7508999705314636},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6697999835014343},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.621399998664856},{"id":"https://openalex.org/C177774035","wikidata":"https://www.wikidata.org/wiki/Q1246948","display_name":"Granularity","level":2,"score":0.48840001225471497},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.45660001039505005},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.4180999994277954},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.3774999976158142},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.36629998683929443},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3506999909877777},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.32100000977516174},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.31470000743865967},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.29190000891685486},{"id":"https://openalex.org/C2781083858","wikidata":"https://www.wikidata.org/wiki/Q17327049","display_name":"Scientific literature","level":2,"score":0.29120001196861267},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2888000011444092},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.26969999074935913},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.262800008058548},{"id":"https://openalex.org/C195732255","wikidata":"https://www.wikidata.org/wiki/Q981008","display_name":"Sociology of scientific knowledge","level":2,"score":0.259799987077713},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.2597000002861023},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.25850000977516174},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.25450000166893005},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.2526000142097473}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.19357","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.19357","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.19357","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.19357","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"are":[4,25,103],"increasingly":[5],"applied":[6],"to":[7,15,49,64,87],"scientific":[8,36,62,67,74,143,168],"research,":[9],"yet":[10],"existing":[11],"evaluations":[12],"often":[13],"fail":[14],"reflect":[16],"the":[17,51,55],"fine-grained":[18,139],"capabilities":[19,68,144,169],"required":[20],"in":[21,69,131,141,170],"practice.":[22],"Most":[23],"benchmarks":[24,59,147],"manually":[26],"curated":[27],"or":[28],"domain-generic,":[29],"limiting":[30],"scalability":[31],"and":[32,83,123,133,163],"alignment":[33],"with":[34,80],"real":[35],"use":[37],"cases.":[38],"In":[39],"this":[40,93],"paper,":[41],"we":[42],"propose":[43],"a":[44,85,97,161],"new":[45],"framework":[46],"named":[47],"SciCustom":[48,71,137],"address":[50],"problem.":[52],"It":[53],"enables":[54],"custom":[56,98],"construction":[57],"of":[58],"from":[60],"large-scale":[61,89],"data":[63,90],"evaluate":[65],"application-specific":[66],"LLMs.":[70,171],"first":[72],"organizes":[73],"knowledge":[75,78,94,101],"into":[76,92],"ontology-grounded":[77],"units":[79,102,110],"controlled":[81],"granularity":[82],"trains":[84],"tagger":[86],"map":[88],"instances":[91],"space.":[95],"Given":[96],"requirement,":[99],"relevant":[100],"identified":[104],"via":[105,115],"voting-based":[106],"multi-model":[107],"consensus.":[108],"These":[109],"enable":[111],"relevance-aware":[112],"benchmark":[113,125],"retrieval":[114],"binary":[116],"search,":[117],"followed":[118],"by":[119],"proxy":[120],"subset":[121],"selection":[122],"data-grounded":[124],"generation":[126],"for":[127,166],"efficient":[128],"evaluation.":[129],"Experiments":[130],"chemistry":[132],"healthcare":[134],"demonstrate":[135],"that":[136,145],"reveals":[138],"differences":[140],"LLM":[142],"standard":[146],"overlook,":[148],"while":[149],"requiring":[150],"neither":[151],"expert":[152],"annotation":[153],"nor":[154],"synthetic":[155],"question":[156],"generation.":[157],"This":[158],"work":[159],"provides":[160],"scalable":[162],"application-aware":[164],"foundation":[165],"benchmarking":[167],"The":[172],"source":[173],"code":[174],"is":[175],"available":[176],"at":[177],"https://github.com/yjwtheonly/SciCustom.":[178]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-21T00:00:00"}
