{"id":"https://openalex.org/W7127887711","doi":"https://doi.org/10.48550/arxiv.2602.05970","title":"Inverse Depth Scaling From Most Layers Being Similar","display_name":"Inverse Depth Scaling From Most Layers Being Similar","publication_year":2026,"publication_date":"2026-02-05","ids":{"openalex":"https://openalex.org/W7127887711","doi":"https://doi.org/10.48550/arxiv.2602.05970"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.05970","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5125221652","display_name":"Yizhou Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yizhou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005473568","display_name":"Sara Kangaslahti","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kangaslahti, Sara","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125211121","display_name":"Ziming Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Ziming","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125222197","display_name":"Jeff Gore","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gore, Jeff","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1972000002861023,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1972000002861023,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.11680000275373459,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.10339999943971634,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.7659000158309937},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.7523000240325928},{"id":"https://openalex.org/keywords/discretization","display_name":"Discretization","score":0.6197999715805054},{"id":"https://openalex.org/keywords/inverse","display_name":"Inverse","score":0.46230000257492065},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.44040000438690186},{"id":"https://openalex.org/keywords/residual-neural-network","display_name":"Residual neural network","score":0.3628999888896942},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3531999886035919},{"id":"https://openalex.org/keywords/limit","display_name":"Limit (mathematics)","score":0.3361999988555908}],"concepts":[{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.7659000158309937},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.7523000240325928},{"id":"https://openalex.org/C73000952","wikidata":"https://www.wikidata.org/wiki/Q17007827","display_name":"Discretization","level":2,"score":0.6197999715805054},{"id":"https://openalex.org/C207467116","wikidata":"https://www.wikidata.org/wiki/Q4385666","display_name":"Inverse","level":2,"score":0.46230000257492065},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.45649999380111694},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.44040000438690186},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.4163999855518341},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.39250001311302185},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3691999912261963},{"id":"https://openalex.org/C2944601119","wikidata":"https://www.wikidata.org/wiki/Q43744058","display_name":"Residual neural network","level":3,"score":0.3628999888896942},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3531999886035919},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.3483999967575073},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.3409000039100647},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.3361999988555908},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.3319000005722046},{"id":"https://openalex.org/C2988430800","wikidata":"https://www.wikidata.org/wiki/Q428971","display_name":"Scaling law","level":3,"score":0.3215000033378601},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.3140000104904175},{"id":"https://openalex.org/C167085575","wikidata":"https://www.wikidata.org/wiki/Q6803654","display_name":"Mean squared prediction error","level":2,"score":0.3003999888896942},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.28949999809265137},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.27619999647140503},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2728999853134155},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2635999917984009},{"id":"https://openalex.org/C135252773","wikidata":"https://www.wikidata.org/wiki/Q1567213","display_name":"Inverse problem","level":2,"score":0.2628999948501587},{"id":"https://openalex.org/C19619285","wikidata":"https://www.wikidata.org/wiki/Q196372","display_name":"Observational error","level":2,"score":0.26190000772476196},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2574000060558319},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.2563999891281128},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.2529999911785126},{"id":"https://openalex.org/C170122806","wikidata":"https://www.wikidata.org/wiki/Q1914828","display_name":"Linear scale","level":2,"score":0.2515000104904175},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.25110000371932983}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.05970","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.05970","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.05970","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.05970","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Neural":[0],"scaling":[1],"laws":[2],"relate":[3],"loss":[4,32,43],"to":[5,19,47,53,104],"model":[6],"size":[7],"in":[8,49],"large":[9],"language":[10],"models":[11],"(LLMs),":[12],"yet":[13,74],"depth":[14,30,48],"and":[15,37,76,86],"width":[16],"may":[17,77,100],"contribute":[18],"performance":[20],"differently,":[21],"requiring":[22],"more":[23],"detailed":[24],"studies.":[25],"Here,":[26],"we":[27],"quantify":[28],"how":[29],"affects":[31],"via":[33],"analysis":[34],"of":[35,83,108],"LLMs":[36],"toy":[38],"residual":[39,84],"networks.":[40],"We":[41],"find":[42],"scales":[44],"inversely":[45],"proportional":[46],"LLMs,":[50],"probably":[51],"due":[52],"functionally":[54],"similar":[55],"layers":[56],"reducing":[57],"error":[58],"through":[59],"ensemble":[60],"averaging":[61],"rather":[62],"than":[63],"compositional":[64,106],"learning":[65],"or":[66],"discretizing":[67],"smooth":[68,91],"dynamics.":[69,92],"This":[70],"regime":[71],"is":[72],"inefficient":[73],"robust":[75],"arise":[78],"from":[79],"the":[80],"architectural":[81,102],"bias":[82],"networks":[85],"target":[87],"functions":[88],"incompatible":[89],"with":[90],"The":[93],"findings":[94],"suggest":[95],"that":[96],"improving":[97],"LLM":[98],"efficiency":[99],"require":[101],"innovations":[103],"encourage":[105],"use":[107],"depth.":[109]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-07T00:00:00"}
