{"id":"https://openalex.org/W7160395584","doi":"https://doi.org/10.48550/arxiv.2605.03346","title":"Provable Accuracy Collapse in Embedding-Based Representations under Dimensionality Mismatch","display_name":"Provable Accuracy Collapse in Embedding-Based Representations under Dimensionality Mismatch","publication_year":2026,"publication_date":"2026-05-05","ids":{"openalex":"https://openalex.org/W7160395584","doi":"https://doi.org/10.48550/arxiv.2605.03346"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.03346","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.03346","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.03346","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135428855","display_name":"Dionysis Arvanitakis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arvanitakis, Dionysis","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054031027","display_name":"Vaggos Chatziafratis","orcid":"https://orcid.org/0000-0002-4475-4504"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chatziafratis, Vaggos","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135446096","display_name":"Yiyuan Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Yiyuan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9115999937057495,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9115999937057495,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.02630000002682209,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.02539999969303608,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7926999926567078},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.7911999821662903},{"id":"https://openalex.org/keywords/complement","display_name":"Complement (music)","score":0.6439999938011169},{"id":"https://openalex.org/keywords/curse-of-dimensionality","display_name":"Curse of dimensionality","score":0.5947999954223633},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4916999936103821},{"id":"https://openalex.org/keywords/intrinsic-dimension","display_name":"Intrinsic dimension","score":0.48829999566078186},{"id":"https://openalex.org/keywords/dimensionality-reduction","display_name":"Dimensionality reduction","score":0.45559999346733093},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4124999940395355},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.4120999872684479}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7926999926567078},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.7911999821662903},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.6439999938011169},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.6050000190734863},{"id":"https://openalex.org/C111030470","wikidata":"https://www.wikidata.org/wiki/Q1430460","display_name":"Curse of dimensionality","level":2,"score":0.5947999954223633},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5709999799728394},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4916999936103821},{"id":"https://openalex.org/C30732413","wikidata":"https://www.wikidata.org/wiki/Q17092636","display_name":"Intrinsic dimension","level":3,"score":0.48829999566078186},{"id":"https://openalex.org/C70518039","wikidata":"https://www.wikidata.org/wiki/Q16000077","display_name":"Dimensionality reduction","level":2,"score":0.45559999346733093},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4124999940395355},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.4120999872684479},{"id":"https://openalex.org/C129782007","wikidata":"https://www.wikidata.org/wiki/Q162886","display_name":"Euclidean geometry","level":2,"score":0.4101000130176544},{"id":"https://openalex.org/C115311070","wikidata":"https://www.wikidata.org/wiki/Q5347255","display_name":"Effective dimension","level":3,"score":0.3756999969482422},{"id":"https://openalex.org/C2777027219","wikidata":"https://www.wikidata.org/wiki/Q1284190","display_name":"Constant (computer programming)","level":2,"score":0.37470000982284546},{"id":"https://openalex.org/C186450821","wikidata":"https://www.wikidata.org/wiki/Q17295","display_name":"Euclidean space","level":2,"score":0.3634999990463257},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.3513000011444092},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3237999975681305},{"id":"https://openalex.org/C120174047","wikidata":"https://www.wikidata.org/wiki/Q847073","display_name":"Euclidean distance","level":2,"score":0.3122999966144562},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.3082999885082245},{"id":"https://openalex.org/C64357122","wikidata":"https://www.wikidata.org/wiki/Q1149766","display_name":"Causality (physics)","level":2,"score":0.28940001130104065},{"id":"https://openalex.org/C529865628","wikidata":"https://www.wikidata.org/wiki/Q1790740","display_name":"Manifold (fluid mechanics)","level":2,"score":0.28690001368522644},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2816999852657318},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2797999978065491},{"id":"https://openalex.org/C136979486","wikidata":"https://www.wikidata.org/wiki/Q773483","display_name":"Existential quantification","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C119322782","wikidata":"https://www.wikidata.org/wiki/Q2662236","display_name":"VC dimension","level":2,"score":0.26510000228881836},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.260699987411499},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.257099986076355},{"id":"https://openalex.org/C35435516","wikidata":"https://www.wikidata.org/wiki/Q5370016","display_name":"Embedding problem","level":3,"score":0.2549999952316284}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.03346","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.03346","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.03346","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.03346","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Embedding-based":[0],"representations":[1],"in":[2,69,101,163],"Euclidean":[3],"space":[4],"$\\mathbb{R}^d$":[5],"are":[6,160],"a":[7,14,38,57,79,132],"cornerstone":[8],"of":[9,81,117,124,171],"modern":[10],"machine":[11],"learning,":[12],"where":[13,74],"major":[15],"goal":[16],"is":[17,47,76],"to":[18,50,78],"use":[19],"the":[20,43,51,125,138,151,157,179],"\\emph{smallest":[21],"dimension}":[22,173],"that":[23,64,106,114,136],"faithfully":[24],"captures":[25],"data":[26],"relations.":[27],"In":[28],"this":[29,65],"work,":[30],"we":[31,104],"prove":[32,105],"sharp":[33],"dimension--accuracy":[34],"tradeoffs":[35],"and":[36],"identify":[37],"fundamental":[39],"information-theoretic":[40,143],"limitation:":[41],"unless":[42],"embedding":[44,100,116],"dimension":[45,53,118],"$d$":[46],"chosen":[48],"close":[49],"ground-truth":[52,99],"$D$,":[54],"accuracy":[55,128,177],"undergoes":[56],"sudden":[58],"collapse.":[59],"Our":[60],"main":[61],"result":[62],"shows":[63],"phenomenon":[66],"arises":[67],"even":[68,155],"standard":[70],"contrastive":[71],"learning":[72],"settings,":[73],"supervision":[75],"limited":[77],"set":[80],"$m$":[82],"anchor--positive--negative":[83],"triplets":[84,94,159],"$(i,j,k)$":[85],"encoding":[86],"distance":[87],"comparisons":[88],"$\\mathrm{dist}(i,j)":[89],"&lt;":[90,111],"\\mathrm{dist}(i,k)$.":[91],"Specifically,":[92],"given":[93,158],"realizable":[95,162],"by":[96],"an":[97],"unknown":[98],"$D$":[102],"dimensions,":[103],"there":[107],"exists":[108],"constant":[109],"$c":[110],"1$,":[112],"such":[113],"\\emph{every":[115],"at":[119],"most":[120],"$cD$":[121],"violates":[122],"half":[123],"triplets},":[126],"yielding":[127],"as":[129,131],"low":[130],"trivial":[133,180],"one-dimensional":[134],"solution":[135],"ignores":[137],"input.":[139],"We":[140],"complement":[141],"our":[142],"bounds":[144],"with":[145],"strong":[146],"computational":[147],"hardness":[148],"results:":[149],"under":[150],"Unique":[152],"Games":[153],"Conjecture,":[154],"if":[156],"nearly":[161],"$D=1$":[164],"dimension,":[165],"no":[166],"polynomial-time":[167],"algorithm":[168],"--":[169,174],"\\textit{regardless":[170],"its":[172],"can":[175],"achieve":[176],"above":[178],"$50\\%$":[181],"baseline.":[182]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-07T00:00:00"}
