{"id":"https://openalex.org/W7157280266","doi":"https://doi.org/10.48550/arxiv.2604.24469","title":"Geometric Analysis of Self-Supervised Vision Representations for Semantic Image Retrieval","display_name":"Geometric Analysis of Self-Supervised Vision Representations for Semantic Image Retrieval","publication_year":2026,"publication_date":"2026-04-27","ids":{"openalex":"https://openalex.org/W7157280266","doi":"https://doi.org/10.48550/arxiv.2604.24469"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.24469","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24469","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.24469","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5093489205","display_name":"Esteban Rodr\u00edguez-Betancourt","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Rodr\u00edguez-Betancourt, Esteban","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124404772","display_name":"Edgar Casasola-Murillo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Casasola-Murillo, Edgar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5093489205"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.6349999904632568,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.6349999904632568,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.3174000084400177,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10689","display_name":"Remote-Sensing Image Classification","score":0.00800000037997961,"subfield":{"id":"https://openalex.org/subfields/2214","display_name":"Media Technology"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.7346000075340271},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5748000144958496},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.46000000834465027},{"id":"https://openalex.org/keywords/k-nearest-neighbors-algorithm","display_name":"k-nearest neighbors algorithm","score":0.4250999987125397},{"id":"https://openalex.org/keywords/geometric-analysis","display_name":"Geometric analysis","score":0.4219000041484833},{"id":"https://openalex.org/keywords/visual-word","display_name":"Visual Word","score":0.41499999165534973},{"id":"https://openalex.org/keywords/semantic-analysis","display_name":"Semantic analysis (machine learning)","score":0.3538999855518341},{"id":"https://openalex.org/keywords/semantic-gap","display_name":"Semantic gap","score":0.3497999906539917},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.3488999903202057}],"concepts":[{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.7346000075340271},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6535999774932861},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6197999715805054},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5748000144958496},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.46000000834465027},{"id":"https://openalex.org/C113238511","wikidata":"https://www.wikidata.org/wiki/Q1071612","display_name":"k-nearest neighbors algorithm","level":2,"score":0.4250999987125397},{"id":"https://openalex.org/C154968394","wikidata":"https://www.wikidata.org/wiki/Q5535474","display_name":"Geometric analysis","level":5,"score":0.4219000041484833},{"id":"https://openalex.org/C189391414","wikidata":"https://www.wikidata.org/wiki/Q7936579","display_name":"Visual Word","level":4,"score":0.41499999165534973},{"id":"https://openalex.org/C2777946921","wikidata":"https://www.wikidata.org/wiki/Q7449044","display_name":"Semantic analysis (machine learning)","level":2,"score":0.3538999855518341},{"id":"https://openalex.org/C86034646","wikidata":"https://www.wikidata.org/wiki/Q474311","display_name":"Semantic gap","level":4,"score":0.3497999906539917},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3488999903202057},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.34880000352859497},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.34790000319480896},{"id":"https://openalex.org/C122342681","wikidata":"https://www.wikidata.org/wiki/Q330828","display_name":"Skewness","level":2,"score":0.34150001406669617},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.3393999934196472},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.3384999930858612},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.32600000500679016},{"id":"https://openalex.org/C89686163","wikidata":"https://www.wikidata.org/wiki/Q1187982","display_name":"Vector space model","level":2,"score":0.32359999418258667},{"id":"https://openalex.org/C2986420190","wikidata":"https://www.wikidata.org/wiki/Q39045939","display_name":"Semantic space","level":2,"score":0.314300000667572},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.3091999888420105},{"id":"https://openalex.org/C13336665","wikidata":"https://www.wikidata.org/wiki/Q125977","display_name":"Vector space","level":2,"score":0.29910001158714294},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.296099990606308},{"id":"https://openalex.org/C199579030","wikidata":"https://www.wikidata.org/wiki/Q2851778","display_name":"Automatic image annotation","level":4,"score":0.29280000925064087},{"id":"https://openalex.org/C170133592","wikidata":"https://www.wikidata.org/wiki/Q1806883","display_name":"Latent semantic analysis","level":2,"score":0.29170000553131104},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.28220000863075256},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.2800999879837036},{"id":"https://openalex.org/C2780052074","wikidata":"https://www.wikidata.org/wiki/Q1128648","display_name":"Content-based image retrieval","level":4,"score":0.2623000144958496},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.258899986743927},{"id":"https://openalex.org/C151876577","wikidata":"https://www.wikidata.org/wiki/Q7049464","display_name":"Nonlinear dimensionality reduction","level":3,"score":0.2558000087738037}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.24469","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24469","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.24469","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.24469","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.49588072299957275,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Content-based":[0],"image":[1],"retrieval":[2,79,157],"(CBIR)":[3],"systems":[4],"enable":[5],"users":[6],"to":[7,154],"search":[8,26],"images":[9],"based":[10],"on":[11,17,52],"visual":[12],"content":[13],"instead":[14,50],"of":[15,27,119,150],"relying":[16,51],"metadata.":[18],"The":[19],"text":[20,60],"domain":[21],"has":[22],"benefited":[23],"from":[24],"vector":[25,83],"representations":[28,67,106,138],"created":[29],"with":[30,107,139],"unsupervised":[31],"methods":[32,40,57,73,115],"such":[33],"as":[34],"BERT.":[35],"However,":[36],"modern":[37,70,113],"self-supervised":[38,71],"learning":[39,72],"for":[41,74],"vision":[42,75],"are":[43],"mostly":[44],"not":[45,134],"reported":[46],"in":[47],"CBIR-related":[48],"literature,":[49],"supervised":[53],"models":[54],"or":[55,130],"multi-modal":[56],"that":[58,81,92],"align":[59],"and":[61,85,121,142],"vision.":[62],"We":[63],"evaluate":[64],"how":[65],"the":[66,93,117,147],"learned":[68],"by":[69,111],"perform":[76],"under":[77],"typical":[78],"stacks":[80],"leverage":[82],"databases":[84],"nearest":[86,99],"neighbor":[87,100],"search.":[88],"Our":[89],"evaluation":[90],"reveals":[91],"latent":[94],"space":[95],"geometry":[96],"impacts":[97],"approximate":[98],"(ANN)":[101],"indexing.":[102],"Specifically,":[103],"highly":[104],"anisotropic":[105],"high":[108],"skewness":[109],"produced":[110],"several":[112],"SSL":[114],"degrade":[116],"performance":[118],"partition-based":[120],"hashing-based":[122],"search,":[123],"even":[124],"if":[125],"their":[126],"own":[127],"linear":[128],"probe":[129],"K-NN":[131],"accuracy":[132],"is":[133],"affected.":[135],"In":[136],"contrast,":[137],"higher":[140],"isotropy":[141],"local":[143],"purity":[144],"better":[145],"satisfy":[146],"distance-based":[148],"assumptions":[149],"ANN":[151],"indexes,":[152],"leading":[153],"improved":[155],"semantic":[156],"performance.":[158]},"counts_by_year":[],"updated_date":"2026-04-29T06:16:36.941037","created_date":"2026-04-29T00:00:00"}
