{"id":"https://openalex.org/W7078281286","doi":"https://doi.org/10.48550/arxiv.2508.19428","title":"Heterogeneous LLM Methods for Ontology Learning (Few-Shot Prompting, Ensemble Typing, and Attention-Based Taxonomies)","display_name":"Heterogeneous LLM Methods for Ontology Learning (Few-Shot Prompting, Ensemble Typing, and Attention-Based Taxonomies)","publication_year":2025,"publication_date":"2025-08-26","ids":{"openalex":"https://openalex.org/W7078281286","doi":"https://doi.org/10.48550/arxiv.2508.19428"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2508.19428","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.19428","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2508.19428","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Beliaeva, Aleksandra","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Beliaeva, Aleksandra","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Rahmatullaev, Temurbek","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rahmatullaev, Temurbek","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T12157","display_name":"Geochemistry and Geologic Mapping","score":0.6693999767303467,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12157","display_name":"Geochemistry and Geologic Mapping","score":0.6693999767303467,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13067","display_name":"Geological Modeling and Analysis","score":0.02459999918937683,"subfield":{"id":"https://openalex.org/subfields/1906","display_name":"Geochemistry and Petrology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14311","display_name":"Electrical and Electromagnetic Research","score":0.020400000736117363,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5558000206947327},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.515500009059906},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.44510000944137573},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.4277999997138977},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.4268999993801117},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.42320001125335693},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.41359999775886536},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.34689998626708984},{"id":"https://openalex.org/keywords/taxonomy","display_name":"Taxonomy (biology)","score":0.33889999985694885}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8040000200271606},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5623999834060669},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5558000206947327},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.515500009059906},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4717000126838684},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.44510000944137573},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.4277999997138977},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.4268999993801117},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.42320001125335693},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.41359999775886536},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.34689998626708984},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.33889999985694885},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.32499998807907104},{"id":"https://openalex.org/C25810664","wikidata":"https://www.wikidata.org/wiki/Q44325","display_name":"Ontology","level":2,"score":0.323199987411499},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3131999969482422},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.31150001287460327},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.30309998989105225},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.30070000886917114},{"id":"https://openalex.org/C55166926","wikidata":"https://www.wikidata.org/wiki/Q2892946","display_name":"Oracle","level":2,"score":0.29330000281333923},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.28220000863075256},{"id":"https://openalex.org/C138958017","wikidata":"https://www.wikidata.org/wiki/Q190087","display_name":"Data type","level":2,"score":0.2802000045776367},{"id":"https://openalex.org/C74197172","wikidata":"https://www.wikidata.org/wiki/Q1195339","display_name":"Directed acyclic graph","level":2,"score":0.2784999907016754},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2784000039100647},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.27799999713897705},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.26919999718666077},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.2676999866962433},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.2669999897480011},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.26489999890327454},{"id":"https://openalex.org/C28006648","wikidata":"https://www.wikidata.org/wiki/Q6934509","display_name":"Multi-task learning","level":3,"score":0.2614000141620636},{"id":"https://openalex.org/C69075417","wikidata":"https://www.wikidata.org/wiki/Q515701","display_name":"Linked data","level":3,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2508.19428","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.19428","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2508.19428","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.19428","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0],"present":[1],"a":[2,65,75,116,147,179,189],"comprehensive":[3],"system":[4],"for":[5,224],"addressing":[6],"Tasks":[7],"A,":[8,54],"B,":[9,105],"and":[10,28,38,60,79,97,219],"C":[11],"of":[12,48,174,221],"the":[13,20,46,49,120,131,138,204,216],"LLMs4OL":[14],"2025":[15],"challenge,":[16],"which":[17,106],"together":[18,212],"span":[19],"full":[21],"ontology":[22,225],"construction":[23],"pipeline:":[24],"term":[25],"extraction,":[26],"typing,":[27],"taxonomy":[29,167],"discovery.":[30],"Our":[31],"approach":[32],"combines":[33,151],"retrieval-augmented":[34,66],"prompting,":[35],"zero-shot":[36,139,148],"classification,":[37],"attention-based":[39],"graph":[40,170],"modeling":[41],"--":[42],"each":[43],"tailored":[44],"to":[45,77,110,183,199],"demands":[47],"respective":[50],"task.":[51],"For":[52],"Task":[53,104,163],"we":[55,129,145,165,177],"jointly":[56],"extract":[57],"domain-specific":[58],"terms":[59,78],"their":[61],"ontological":[62],"types":[63,80,109],"using":[64,159],"generation":[67],"(RAG)":[68],"pipeline.":[69],"Training":[70],"data":[71],"was":[72],"reformulated":[73],"into":[74],"document":[76],"correspondence,":[81],"while":[82],"test-time":[83],"inference":[84],"leverages":[85],"semantically":[86],"similar":[87],"training":[88,127],"examples.":[89],"This":[90],"single-pass":[91],"method":[92],"requires":[93],"no":[94],"model":[95,166],"finetuning":[96],"improves":[98],"overall":[99],"performance":[100],"through":[101],"lexical":[102],"augmentation":[103],"involves":[107],"assigning":[108],"given":[111],"terms,":[112],"is":[113,231],"handled":[114],"via":[115],"dual":[117],"strategy.":[118],"In":[119,137,162],"few-shot":[121,135],"setting":[122,140],"(for":[123,141],"domains":[124],"with":[125,134],"labeled":[126],"data),":[128],"reuse":[130],"RAG":[132],"scheme":[133],"prompting.":[136],"previously":[142],"unseen":[143],"domains),":[144],"use":[146],"classifier":[149],"that":[150],"cosine":[152],"similarity":[153],"scores":[154],"from":[155],"multiple":[156],"embedding":[157],"models":[158],"confidence-based":[160],"weighting.":[161],"C,":[164],"discovery":[168],"as":[169],"inference.":[171],"Using":[172],"embeddings":[173],"type":[175],"labels,":[176],"train":[178],"lightweight":[180],"cross-attention":[181],"layer":[182],"predict":[184],"is-a":[185],"relations":[186],"by":[187],"approximating":[188],"soft":[190],"adjacency":[191],"matrix.":[192],"These":[193],"modular,":[194],"task-specific":[195],"solutions":[196],"enabled":[197],"us":[198],"achieve":[200],"top-ranking":[201],"results":[202],"in":[203],"official":[205],"leaderboard":[206],"across":[207,227],"all":[208],"three":[209],"tasks.":[210],"Taken":[211],"these":[213],"strategies":[214],"showcase":[215],"scalability,":[217],"adaptability,":[218],"robustness":[220],"LLM-based":[222],"architectures":[223],"learning":[226],"heterogeneous":[228],"domains.":[229],"Code":[230],"available":[232],"at:":[233],"https://github.com/BelyaevaAlex/LLMs4OL-Challenge-Alexbek":[234]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
