{"id":"https://openalex.org/W7130704241","doi":"https://doi.org/10.48550/arxiv.2602.16926","title":"BEMEval-Doc2Schema: Benchmarking Large Language Models for Structured Data Extraction in Building Energy Modeling","display_name":"BEMEval-Doc2Schema: Benchmarking Large Language Models for Structured Data Extraction in Building Energy Modeling","publication_year":2026,"publication_date":"2026-02-18","ids":{"openalex":"https://openalex.org/W7130704241","doi":"https://doi.org/10.48550/arxiv.2602.16926"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.16926","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021947780","display_name":"Yiyuan Jia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jia, Yiyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126501966","display_name":"Xiaoqin Fu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Xiaoqin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126456736","display_name":"Liang Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Liang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11006","display_name":"BIM and Construction Integration","score":0.44749999046325684,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11006","display_name":"BIM and Construction Integration","score":0.44749999046325684,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10121","display_name":"Building Energy and Comfort Optimization","score":0.2687999904155731,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11052","display_name":"Energy Load and Power Forecasting","score":0.028999999165534973,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.8327999711036682},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6647999882698059},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.6327000260353088},{"id":"https://openalex.org/keywords/nist","display_name":"NIST","score":0.5533000230789185},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4986000061035156},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.45579999685287476},{"id":"https://openalex.org/keywords/model-building","display_name":"Model building","score":0.38190001249313354},{"id":"https://openalex.org/keywords/data-extraction","display_name":"Data extraction","score":0.35569998621940613}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.8327999711036682},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7035999894142151},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6647999882698059},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.6327000260353088},{"id":"https://openalex.org/C111219384","wikidata":"https://www.wikidata.org/wiki/Q6954384","display_name":"NIST","level":2,"score":0.5533000230789185},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4986000061035156},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.45579999685287476},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.45249998569488525},{"id":"https://openalex.org/C189474733","wikidata":"https://www.wikidata.org/wiki/Q917912","display_name":"Model building","level":2,"score":0.38190001249313354},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3718000054359436},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.35569998621940613},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3472000062465668},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.34369999170303345},{"id":"https://openalex.org/C2780898871","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Performance metric","level":2,"score":0.33869999647140503},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.326200008392334},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.31060001254081726},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.2865000069141388},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2856999933719635},{"id":"https://openalex.org/C55037315","wikidata":"https://www.wikidata.org/wiki/Q5421151","display_name":"Experimental data","level":2,"score":0.28380000591278076},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.28119999170303345},{"id":"https://openalex.org/C2780331096","wikidata":"https://www.wikidata.org/wiki/Q24965464","display_name":"Energy modeling","level":3,"score":0.2775000035762787},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.25270000100135803},{"id":"https://openalex.org/C13736549","wikidata":"https://www.wikidata.org/wiki/Q4489420","display_name":"Industrial engineering","level":1,"score":0.25040000677108765}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.16926","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.16926","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.16926","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.16926","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"score":0.7124553918838501,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,57,182],"foundation":[3,48],"models,":[4],"including":[5],"large":[6],"language":[7],"models":[8,104],"(LLMs),":[9],"have":[10],"created":[11],"new":[12],"opportunities":[13],"to":[14,27,46],"automate":[15],"building":[16,67,184],"energy":[17,185],"modeling":[18,186],"(BEM).":[19],"However,":[20],"systematic":[21],"evaluation":[22],"has":[23],"remained":[24],"challenging":[25],"due":[26],"the":[28,78,87,147,175,189],"absence":[29],"of":[30],"publicly":[31],"available,":[32],"task-specific":[33],"datasets":[34],"and":[35,93,106,111,122,133,160,170],"standardized":[36],"performance":[37,50],"metrics.":[38],"We":[39],"present":[40],"BEMEval,":[41],"a":[42,69,83],"benchmark":[43,56,178],"framework":[44],"designed":[45],"assess":[47],"models'":[49],"across":[51,115],"BEM":[52,74,196],"tasks.":[53],"The":[54],"first":[55,176],"this":[58,98],"suite,":[59],"BEMEval-Doc2Schema,":[60],"focuses":[61],"on":[62,194],"structured":[63,91],"data":[64],"extraction":[65],"from":[66],"documentation,":[68],"foundational":[70],"step":[71],"toward":[72],"automated":[73],"processes.":[75],"BEMEval-Doc2Schema":[76,173],"introduces":[77],"Key-Value":[79],"Overlap":[80],"Rate":[81],"(KVOR),":[82],"metric":[84],"that":[85,127,134],"quantifies":[86],"alignment":[88],"between":[89],"LLM-generated":[90],"outputs":[92],"ground-truth":[94],"schema":[95,149],"references.":[96],"Using":[97],"framework,":[99],"we":[100],"evaluate":[101],"two":[102],"leading":[103],"(GPT-5":[105],"Gemini":[107,128],"2.5)":[108],"under":[109],"zero-shot":[110],"few-shot":[112,135],"prompting":[113],"strategies":[114],"three":[116],"datasets:":[117],"HERS":[118],"L100,":[119],"NREL":[120],"iUnit,":[121],"NIST":[123],"NZERTF.":[124],"Results":[125],"show":[126],"2.5":[129],"consistently":[130],"outperforms":[131],"GPT-5,":[132],"prompts":[136],"improve":[137],"accuracy":[138],"for":[139,179,191],"both":[140],"models.":[141],"Performance":[142],"also":[143],"varies":[144],"by":[145],"schema:":[146],"EPC":[148],"yields":[150],"significantly":[151],"higher":[152],"KVOR":[153],"scores":[154],"than":[155],"HPXML,":[156],"reflecting":[157],"its":[158],"simpler":[159],"reduced":[161],"hierarchical":[162],"depth.":[163],"By":[164],"combining":[165],"curated":[166],"datasets,":[167],"reproducible":[168],"metrics,":[169],"cross-model":[171],"comparisons,":[172],"establishes":[174],"community-driven":[177],"evaluating":[180],"LLMs":[181],"performing":[183],"tasks,":[187],"laying":[188],"groundwork":[190],"future":[192],"research":[193],"AI-assisted":[195],"workflows.":[197]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-21T00:00:00"}
