{"id":"https://openalex.org/W4382602885","doi":"https://doi.org/10.48550/arxiv.2306.14975","title":"The Underlying Scaling Laws and Universal Statistical Structure of Complex Datasets","display_name":"The Underlying Scaling Laws and Universal Statistical Structure of Complex Datasets","publication_year":2023,"publication_date":"2023-06-26","ids":{"openalex":"https://openalex.org/W4382602885","doi":"https://doi.org/10.48550/arxiv.2306.14975"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2306.14975","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.14975","pdf_url":"https://arxiv.org/pdf/2306.14975","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2306.14975","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028029621","display_name":"Noam Levi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Levi, Noam","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5005360706","display_name":"Yaron Oz","orcid":"https://orcid.org/0000-0002-1179-5668"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Oz, Yaron","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5028029621"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12261","display_name":"Statistical Mechanics and Entropy","score":0.9800999760627747,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12261","display_name":"Statistical Mechanics and Entropy","score":0.9800999760627747,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.9767000079154968,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12946","display_name":"Fractal and DNA sequence analysis","score":0.9520999789237976,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/random-matrix","display_name":"Random matrix","score":0.6915231943130493},{"id":"https://openalex.org/keywords/wishart-distribution","display_name":"Wishart distribution","score":0.6236394643783569},{"id":"https://openalex.org/keywords/covariance-matrix","display_name":"Covariance matrix","score":0.5403403043746948},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.5387014746665955},{"id":"https://openalex.org/keywords/covariance","display_name":"Covariance","score":0.5322824716567993},{"id":"https://openalex.org/keywords/eigenvalues-and-eigenvectors","display_name":"Eigenvalues and eigenvectors","score":0.5030936598777771},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.46674734354019165},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.45482945442199707},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4518534243106842},{"id":"https://openalex.org/keywords/scatter-matrix","display_name":"Scatter matrix","score":0.43277212977409363},{"id":"https://openalex.org/keywords/universality","display_name":"Universality (dynamical systems)","score":0.4237672686576843},{"id":"https://openalex.org/keywords/statistical-physics","display_name":"Statistical physics","score":0.4183708429336548},{"id":"https://openalex.org/keywords/estimation-of-covariance-matrices","display_name":"Estimation of covariance matrices","score":0.3715026378631592},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3211931884288788},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3082565665245056},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.25481706857681274},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.2426154613494873},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.10089591145515442}],"concepts":[{"id":"https://openalex.org/C64812099","wikidata":"https://www.wikidata.org/wiki/Q176604","display_name":"Random matrix","level":3,"score":0.6915231943130493},{"id":"https://openalex.org/C33962027","wikidata":"https://www.wikidata.org/wiki/Q1930697","display_name":"Wishart distribution","level":3,"score":0.6236394643783569},{"id":"https://openalex.org/C185142706","wikidata":"https://www.wikidata.org/wiki/Q1134404","display_name":"Covariance matrix","level":2,"score":0.5403403043746948},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.5387014746665955},{"id":"https://openalex.org/C178650346","wikidata":"https://www.wikidata.org/wiki/Q201984","display_name":"Covariance","level":2,"score":0.5322824716567993},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.5030936598777771},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.46674734354019165},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.45482945442199707},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4518534243106842},{"id":"https://openalex.org/C176917957","wikidata":"https://www.wikidata.org/wiki/Q7430596","display_name":"Scatter matrix","level":4,"score":0.43277212977409363},{"id":"https://openalex.org/C183992945","wikidata":"https://www.wikidata.org/wiki/Q2495574","display_name":"Universality (dynamical systems)","level":2,"score":0.4237672686576843},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.4183708429336548},{"id":"https://openalex.org/C180877172","wikidata":"https://www.wikidata.org/wiki/Q5401390","display_name":"Estimation of covariance matrices","level":3,"score":0.3715026378631592},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3211931884288788},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3082565665245056},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.25481706857681274},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.2426154613494873},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.10089591145515442},{"id":"https://openalex.org/C161584116","wikidata":"https://www.wikidata.org/wiki/Q1952580","display_name":"Multivariate statistics","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2306.14975","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.14975","pdf_url":"https://arxiv.org/pdf/2306.14975","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2306.14975","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2306.14975","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2306.14975","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2306.14975","pdf_url":"https://arxiv.org/pdf/2306.14975","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.550000011920929}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4382602885.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2955292144","https://openalex.org/W1557964549","https://openalex.org/W3099713672","https://openalex.org/W2011956837","https://openalex.org/W3134381438","https://openalex.org/W1981092891","https://openalex.org/W3048329156","https://openalex.org/W2364482911","https://openalex.org/W2782670000","https://openalex.org/W2082885816"],"abstract_inverted_index":{"We":[0,44],"study":[1],"universal":[2],"traits":[3],"which":[4,239],"emerge":[5],"both":[6,52,103],"in":[7,14,109,181],"real-world":[8,84,106,146],"complex":[9],"datasets,":[10],"as":[11,13,118],"well":[12,215],"artificially":[15],"generated":[16,104],"ones.":[17],"Our":[18,59],"approach":[19],"is":[20,169,178],"to":[21,24,39,83,152,158,186,193,230],"analogize":[22],"data":[23,81,97,243],"a":[25,218,223],"physical":[26],"system":[27],"and":[28,34,55,105,148,175,189,237],"employ":[29],"tools":[30],"from":[31,114],"statistical":[32,128],"physics":[33],"Random":[35],"Matrix":[36],"Theory":[37],"(RMT)":[38],"reveal":[40],"their":[41],"underlying":[42],"structure.":[43],"focus":[45],"on":[46,241],"the":[47,68,110,115,125,153,160,166,195,206,228,242],"feature-feature":[48],"covariance":[49,134,225],"matrix,":[50],"analyzing":[51],"its":[53,71],"local":[54,172],"global":[56],"eigenvalue":[57],"statistics.":[58],"main":[60],"observations":[61],"are:":[62],"(i)":[63],"The":[64],"power-law":[65,162],"scalings":[66],"that":[67,201],"bulk":[69],"of":[70,155,209,233],"eigenvalues":[72,176],"exhibit":[73],"are":[74],"vastly":[75],"different":[76],"for":[77,132,145],"uncorrelated":[78,187],"normally":[79],"distributed":[80],"compared":[82,185],"data,":[85],"(ii)":[86],"this":[87],"scaling":[88,163],"behavior":[89,129],"can":[90,149,213],"be":[91,150,214],"completely":[92],"modeled":[93],"by":[94,217],"generating":[95],"Gaussian":[96],"with":[98,171,202,222],"long":[99],"range":[100],"correlations,":[101],"(iii)":[102],"datasets":[107,184,212],"lie":[108],"same":[111],"universality":[112],"class":[113],"RMT":[116,127,173],"perspective,":[117],"chaotic":[119],"rather":[120],"than":[121,141],"integrable":[122],"systems,":[123],"(iv)":[124],"expected":[126],"already":[130],"manifests":[131],"empirical":[133],"matrices":[135],"at":[136],"dataset":[137],"sizes":[138],"significantly":[139],"smaller":[140,180],"those":[142],"conventionally":[143],"used":[144],"training,":[147],"related":[151],"number":[154],"samples":[156,192],"required":[157],"approximate":[159],"population":[161],"behavior,":[164],"(v)":[165],"Shannon":[167],"entropy":[168],"correlated":[170,183],"structure":[174],"scaling,":[177],"substantially":[179],"strongly":[182],"ones,":[188],"requires":[190],"fewer":[191],"reach":[194],"distribution":[196],"entropy.":[197],"These":[198],"findings":[199],"show":[200],"sufficient":[203],"sample":[204],"size,":[205],"Gram":[207,244],"matrix":[208,221],"natural":[210],"image":[211],"approximated":[216],"Wishart":[219],"random":[220],"simple":[224],"structure,":[226],"opening":[227],"door":[229],"rigorous":[231],"studies":[232],"neural":[234],"network":[235],"dynamics":[236],"generalization":[238],"rely":[240],"matrix.":[245]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-14T08:43:22.919905","created_date":"2025-10-10T00:00:00"}
