{"id":"https://openalex.org/W7160950371","doi":"https://doi.org/10.48550/arxiv.2605.09485","title":"SEMASIA: A Large-Scale Dataset of Semantically Structured Latent Representations","display_name":"SEMASIA: A Large-Scale Dataset of Semantically Structured Latent Representations","publication_year":2026,"publication_date":"2026-05-10","ids":{"openalex":"https://openalex.org/W7160950371","doi":"https://doi.org/10.48550/arxiv.2605.09485"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.09485","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.09485","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.09485","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119536252","display_name":"Mario Edoardo Pandolfo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pandolfo, Mario Edoardo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004036090","display_name":"Enrico Grimaldi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Grimaldi, Enrico","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009176731","display_name":"Lorenzo Marinucci","orcid":"https://orcid.org/0000-0001-6345-7153"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marinucci, Lorenzo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074239143","display_name":"Leonardo Di Nino","orcid":"https://orcid.org/0000-0002-0961-8320"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Di Nino, Leonardo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092964206","display_name":"Simone Fiorellino","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fiorellino, Simone","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084159640","display_name":"Sergio Barbarossa","orcid":"https://orcid.org/0000-0001-9846-8741"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Barbarossa, Sergio","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135966856","display_name":"Paolo Di Lorenzo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Di Lorenzo, Paolo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.4496999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.4496999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.12770000100135803,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.12219999730587006,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/probabilistic-latent-semantic-analysis","display_name":"Probabilistic latent semantic analysis","score":0.6586999893188477},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5450999736785889},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5278000235557556},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.5130000114440918},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.4950000047683716},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4575999975204468},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.41909998655319214},{"id":"https://openalex.org/keywords/latent-semantic-analysis","display_name":"Latent semantic analysis","score":0.3774000108242035},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.37439998984336853}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7103999853134155},{"id":"https://openalex.org/C112933361","wikidata":"https://www.wikidata.org/wiki/Q2845258","display_name":"Probabilistic latent semantic analysis","level":2,"score":0.6586999893188477},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6101999878883362},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5450999736785889},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5278000235557556},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.5130000114440918},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.4950000047683716},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4575999975204468},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4496999979019165},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.41909998655319214},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4081999957561493},{"id":"https://openalex.org/C170133592","wikidata":"https://www.wikidata.org/wiki/Q1806883","display_name":"Latent semantic analysis","level":2,"score":0.3774000108242035},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.37439998984336853},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.36570000648498535},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.365200012922287},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3562000095844269},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.34599998593330383},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.33980000019073486},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.3327000141143799},{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.3118000030517578},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.30959999561309814},{"id":"https://openalex.org/C207609745","wikidata":"https://www.wikidata.org/wiki/Q4944086","display_name":"Bootstrapping (finance)","level":2,"score":0.2955000102519989},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.29030001163482666},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.26759999990463257},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2635999917984009}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.09485","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.09485","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.09485","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.09485","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Latent":[0],"representations":[1,90],"learned":[2],"by":[3,15,69],"neural":[4],"networks":[5],"often":[6],"exhibit":[7],"semantic":[8,63,141],"structure,":[9],"where":[10],"concept":[11],"similarity":[12],"is":[13,52],"reflected":[14],"geometric":[16,183],"proximity":[17],"in":[18,30],"embedding":[19],"space.":[20],"However,":[21],"comparing":[22],"such":[23],"spaces":[24,155],"across":[25,98,143],"models":[26,97,144],"remains":[27,67],"difficult:":[28],"changes":[29],"architecture,":[31],"pretraining":[32,113],"data,":[33],"objective,":[34],"or":[35],"random":[36],"seed":[37],"can":[38],"yield":[39],"embeddings":[40,105],"with":[41,106,193],"similar":[42],"content":[43],"but":[44],"incompatible":[45],"geometry.":[46],"This":[47],"latent":[48,89,133,154,203],"space":[49],"alignment":[50,151,206],"problem":[51],"central":[53],"to":[54,182],"interpretability,":[55],"transfer":[56,175],"and":[57,62,75,115,139,145,159,178,184,208,212],"multimodal":[58],"learning,":[59,176],"federated":[60],"systems,":[61],"communication;":[64],"however,":[65],"progress":[66],"limited":[68],"the":[70,123,128],"lack":[71],"of":[72,88,122,131,170,187],"large-scale,":[73],"model-diverse,":[74],"metadata-rich":[76],"benchmarks.":[77,102],"To":[78],"address":[79],"this":[80],"gap,":[81],"we":[82,126,148,164],"introduce":[83],"SEMASIA,":[84],"a":[85,166,198],"large-scale":[86,167],"collection":[87],"extracted":[91],"from":[92],"approximately":[93],"1,700":[94],"pretrained":[95],"vision":[96],"eight":[99],"standard":[100],"image-classification":[101],"SEMASIA":[103,196],"pairs":[104],"structured":[107],"metadata":[108],"describing":[109],"architectures,":[110],"training":[111],"regimes,":[112],"sources,":[114],"model":[116,179],"scale.":[117],"We":[118],"demonstrate":[119],"three":[120],"applications":[121],"resource.":[124],"First,":[125],"analyze":[127],"conceptual":[129],"organization":[130],"individual":[132],"spaces,":[134],"showing":[135],"consistent":[136],"prototype-like":[137],"clustering":[138],"hierarchical":[140],"neighborhoods":[142],"datasets.":[146],"Second,":[147],"benchmark":[149],"supervised":[150],"mappings":[152],"between":[153],"using":[156],"reconstruction":[157],"error":[158],"downstream":[160],"task":[161],"performance.":[162],"Third,":[163],"perform":[165],"regression":[168],"analysis":[169],"how":[171],"pretraining-data":[172],"complexity,":[173],"specialization,":[174],"augmentation,":[177],"scale":[180,192],"relate":[181],"probing":[185],"properties":[186],"embeddings.":[188],"By":[189],"coupling":[190],"representational":[191],"standardized":[194],"metadata,":[195],"provides":[197],"reproducible":[199],"foundation":[200],"for":[201],"studying":[202],"geometry,":[204],"evaluating":[205],"methods,":[207],"developing":[209],"next-generation":[210],"heterogeneous":[211],"interoperable":[213],"AI":[214],"systems.":[215]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-13T00:00:00"}
