{"id":"https://openalex.org/W4387928778","doi":"https://doi.org/10.48550/arxiv.2310.14029","title":"LLM-Prop: Predicting Physical And Electronic Properties Of Crystalline Solids From Their Text Descriptions","display_name":"LLM-Prop: Predicting Physical And Electronic Properties Of Crystalline Solids From Their Text Descriptions","publication_year":2023,"publication_date":"2023-10-21","ids":{"openalex":"https://openalex.org/W4387928778","doi":"https://doi.org/10.48550/arxiv.2310.14029"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2310.14029","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.14029","pdf_url":"https://arxiv.org/pdf/2310.14029","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2310.14029","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024826347","display_name":"Andre Niyongabo Rubungo","orcid":"https://orcid.org/0000-0003-3608-2039"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rubungo, Andre Niyongabo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080163141","display_name":"Craig B. Arnold","orcid":"https://orcid.org/0000-0002-0293-5411"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arnold, Craig","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066075206","display_name":"Barry P. Rand","orcid":"https://orcid.org/0000-0003-4409-8751"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rand, Barry P.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5063448291","display_name":"Adji Bousso Dieng","orcid":"https://orcid.org/0000-0001-5687-3554"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dieng, Adji Bousso","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":29,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9571999907493591,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12613","display_name":"X-ray Diffraction in Crystallography","score":0.9451000094413757,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/crystal","display_name":"Crystal (programming language)","score":0.7044224739074707},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6662665605545044},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5841444730758667},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5402223467826843},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.49487608671188354},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4477244019508362},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.4236701726913452},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3659592866897583},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.33146098256111145},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.31519705057144165},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.1053709089756012},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.09164318442344666}],"concepts":[{"id":"https://openalex.org/C2781285689","wikidata":"https://www.wikidata.org/wiki/Q21921428","display_name":"Crystal (programming language)","level":2,"score":0.7044224739074707},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6662665605545044},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5841444730758667},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5402223467826843},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.49487608671188354},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4477244019508362},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.4236701726913452},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3659592866897583},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33146098256111145},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.31519705057144165},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.1053709089756012},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.09164318442344666},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2310.14029","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.14029","pdf_url":"https://arxiv.org/pdf/2310.14029","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2310.14029","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2310.14029","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2310.14029","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2310.14029","pdf_url":"https://arxiv.org/pdf/2310.14029","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6399999856948853,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G2044452311","display_name":null,"funder_award_id":"2118201","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8188805235","display_name":null,"funder_award_id":"2118201","funder_id":"https://openalex.org/F4320337377","funder_display_name":"Office of Advanced Cyberinfrastructure"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320309292","display_name":"Princeton University","ror":"https://ror.org/00hx57361"},{"id":"https://openalex.org/F4320337377","display_name":"Office of Advanced Cyberinfrastructure","ror":"https://ror.org/04nh1dc89"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4387928778.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W2745001401","https://openalex.org/W4321353415","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W972276598","https://openalex.org/W2087343574","https://openalex.org/W4246352526","https://openalex.org/W2121910908"],"abstract_inverted_index":{"The":[0],"prediction":[1],"of":[2,70,77,101,119,131,196],"crystal":[3,11,18,23,45,51,54,102,143,211],"properties":[4,19,52,130],"plays":[5],"a":[6,44,47,92,111,174,177],"crucial":[7],"role":[8],"in":[9,149,154,166],"the":[10,36,60,71,75,115,126,139,157,193],"design":[12],"process.":[13],"Current":[14],"methods":[15],"for":[16,81,209],"predicting":[17,50,150,167],"focus":[20],"on":[21],"modeling":[22,35],"structures":[24,103],"using":[25],"graph":[26],"neural":[27],"networks":[28],"(GNNs).":[29],"Although":[30],"GNNs":[31,197],"are":[32],"powerful,":[33],"accurately":[34],"complex":[37],"interactions":[38],"between":[39],"atoms":[40],"and":[41,63,89,128,164,206],"molecules":[42],"within":[43],"remains":[46],"challenge.":[48],"Surprisingly,":[49],"from":[53,133],"text":[55,66,99,135],"descriptions":[56,100],"is":[57,74,160],"understudied,":[58],"despite":[59,182],"rich":[61],"information":[62,200],"expressiveness":[64],"that":[65,97,113],"data":[67,80],"offer.":[68],"One":[69],"main":[72],"reasons":[73],"lack":[76],"publicly":[78],"available":[79],"this":[82,85],"task.":[83],"In":[84],"paper,":[86],"we":[87],"develop":[88],"make":[90],"public":[91],"benchmark":[93],"dataset":[94],"(called":[95],"TextEdge)":[96],"contains":[98],"with":[104],"their":[105,134],"properties.":[106],"We":[107],"then":[108],"propose":[109],"LLM-Prop,":[110],"method":[112],"leverages":[114],"general-purpose":[116],"learning":[117],"capabilities":[118],"large":[120],"language":[121],"models":[122],"(LLMs)":[123],"to":[124,198,202],"predict":[125],"physical":[127],"electronic":[129],"crystals":[132],"descriptions.":[136],"LLM-Prop":[137,171],"outperforms":[138,173],"current":[140,194],"state-of-the-art":[141],"GNN-based":[142],"property":[144,212],"predictor":[145],"by":[146],"about":[147],"4%":[148],"band":[151,158],"gap,":[152],"3%":[153],"classifying":[155],"whether":[156],"gap":[159],"direct":[161],"or":[162],"indirect,":[163],"66%":[165],"unit":[168],"cell":[169],"volume.":[170],"also":[172],"finetuned":[175],"MatBERT,":[176],"domain-specific":[178],"pre-trained":[179],"BERT":[180],"model,":[181],"having":[183],"3":[184],"times":[185],"fewer":[186],"parameters.":[187],"Our":[188],"empirical":[189],"results":[190],"may":[191],"highlight":[192],"inability":[195],"capture":[199],"pertaining":[201],"space":[203],"group":[204],"symmetry":[205],"Wyckoff":[207],"sites":[208],"accurate":[210],"prediction.":[213]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":18}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
