{"id":"https://openalex.org/W7130356799","doi":"https://doi.org/10.48550/arxiv.2602.15791","title":"Enhancing Building Semantics Preservation in AI Model Training with Large Language Model Encodings","display_name":"Enhancing Building Semantics Preservation in AI Model Training with Large Language Model Encodings","publication_year":2026,"publication_date":"2026-02-17","ids":{"openalex":"https://openalex.org/W7130356799","doi":"https://doi.org/10.48550/arxiv.2602.15791"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.15791","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15791","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.15791","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126284701","display_name":"Suhyung Jang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jang, Suhyung","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126288154","display_name":"Ghang Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Ghang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078833903","display_name":"Jaekun Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Jaekun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5126350185","display_name":"Hyunjun Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, Hyunjun","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5126284701"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11006","display_name":"BIM and Construction Integration","score":0.5834000110626221,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11006","display_name":"BIM and Construction Integration","score":0.5834000110626221,"subfield":{"id":"https://openalex.org/subfields/2215","display_name":"Building and Construction"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.07490000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.06430000066757202,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7585999965667725},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.6452999711036682},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6355999708175659},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.6140000224113464},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.5619000196456909},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.4546999931335449},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.45080000162124634},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.42829999327659607}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7585999965667725},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6876000165939331},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.6452999711036682},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6355999708175659},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6140000224113464},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5677000284194946},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.5619000196456909},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.4546999931335449},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.45080000162124634},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.42829999327659607},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4065000116825104},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3978999853134155},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38109999895095825},{"id":"https://openalex.org/C70518039","wikidata":"https://www.wikidata.org/wiki/Q16000077","display_name":"Dimensionality reduction","level":2,"score":0.3560999929904938},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.3549000024795532},{"id":"https://openalex.org/C2779542340","wikidata":"https://www.wikidata.org/wiki/Q1062461","display_name":"Learning object","level":2,"score":0.3215000033378601},{"id":"https://openalex.org/C2778828372","wikidata":"https://www.wikidata.org/wiki/Q5283209","display_name":"Distributional semantics","level":3,"score":0.2969000041484833},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2849999964237213},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.28290000557899475},{"id":"https://openalex.org/C189474733","wikidata":"https://www.wikidata.org/wiki/Q917912","display_name":"Model building","level":2,"score":0.2815999984741211},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2766000032424927},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.2694000005722046}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.15791","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15791","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.15791","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15791","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.5548407435417175}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Accurate":[0],"representation":[1,127],"of":[2,150,163,180],"building":[3,80,94,101,175],"semantics,":[4],"encompassing":[5],"both":[6],"generic":[7],"object":[8,95],"types":[9],"and":[10,25,70,119,182],"specific":[11],"subtypes,":[12,44],"is":[13],"essential":[14],"for":[15,155,194],"effective":[16],"AI":[17],"model":[18,64],"training":[19,58,88],"in":[20,79,197],"the":[21,38,84,125,136,141,161,178,202],"architecture,":[22],"engineering,":[23],"construction,":[24],"operation":[26],"(AECO)":[27],"industry.":[28,204],"Conventional":[29],"encoding":[30],"methods":[31],"(e.g.,":[32,67],"one-hot)":[33],"often":[34],"fail":[35],"to":[36,75,91,153,167,171,187],"convey":[37],"nuanced":[39],"relationships":[40],"among":[41],"closely":[42],"related":[43],"limiting":[45],"AI's":[46,169],"semantic":[47,198],"comprehension.":[48],"To":[49],"address":[50],"this":[51,53,189],"limitation,":[52],"study":[54],"proposes":[55],"a":[56,146],"novel":[57],"approach":[59,190],"that":[60,132],"employs":[61],"large":[62],"language":[63],"(LLM)":[65],"embeddings":[66,114,122],"OpenAI":[68],"GPT":[69],"Meta":[71],"LLaMA)":[72],"as":[73],"encodings":[74,134,166],"preserve":[76],"finer":[77],"distinctions":[78],"semantics.":[81,176],"We":[82],"evaluated":[83],"proposed":[85],"method":[86],"by":[87],"GraphSAGE":[89],"models":[90,103],"classify":[92],"42":[93],"subtypes":[96],"across":[97],"five":[98],"high-rise":[99],"residential":[100],"information":[102],"(BIMs).":[104],"Various":[105],"embedding":[106,144],"dimensions":[107],"were":[108],"tested,":[109],"including":[110],"original":[111],"high-dimensional":[112],"LLM":[113,133],"(1,536,":[115],"3,072,":[116],"or":[117],"4,096)":[118],"1,024-dimensional":[120],"compacted":[121],"generated":[123],"via":[124],"Matryoshka":[126],"model.":[128],"Experimental":[129],"results":[130,159],"demonstrated":[131],"outperformed":[135],"conventional":[137],"one-hot":[138,156],"baseline,":[139],"with":[140],"llama-3":[142],"(compacted)":[143],"achieving":[145],"weighted":[147],"average":[148],"F1-score":[149],"0.8766,":[151],"compared":[152],"0.8475":[154],"encoding.":[157],"The":[158],"underscore":[160],"promise":[162],"leveraging":[164],"LLM-based":[165],"enhance":[168],"ability":[170],"interpret":[172],"complex,":[173],"domain-specific":[174],"As":[177],"capabilities":[179],"LLMs":[181],"dimensionality":[183],"reduction":[184],"techniques":[185],"continue":[186],"evolve,":[188],"holds":[191],"considerable":[192],"potential":[193],"broad":[195],"application":[196],"elaboration":[199],"tasks":[200],"throughout":[201],"AECO":[203]},"counts_by_year":[],"updated_date":"2026-02-19T06:31:58.851227","created_date":"2026-02-19T00:00:00"}
