{"id":"https://openalex.org/W7123946697","doi":"https://doi.org/10.48550/arxiv.2601.06036","title":"Tree-Preconditioned Differentiable Optimization and Axioms as Layers","display_name":"Tree-Preconditioned Differentiable Optimization and Axioms as Layers","publication_year":2025,"publication_date":"2025-12-03","ids":{"openalex":"https://openalex.org/W7123946697","doi":"https://doi.org/10.48550/arxiv.2601.06036"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.06036","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.06036","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.06036","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122970477","display_name":"Yuexin Liao","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liao, Yuexin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5122970477"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.7649000287055969,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.7649000287055969,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11596","display_name":"Constraint Satisfaction and Optimization","score":0.03280000016093254,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10720","display_name":"Complexity and Algorithms in Graphs","score":0.024900000542402267,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/differentiable-function","display_name":"Differentiable function","score":0.6111000180244446},{"id":"https://openalex.org/keywords/lipschitz-continuity","display_name":"Lipschitz continuity","score":0.5202999711036682},{"id":"https://openalex.org/keywords/polytope","display_name":"Polytope","score":0.5184999704360962},{"id":"https://openalex.org/keywords/jacobian-matrix-and-determinant","display_name":"Jacobian matrix and determinant","score":0.49140000343322754},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.36469998955726624},{"id":"https://openalex.org/keywords/conjugate-gradient-method","display_name":"Conjugate gradient method","score":0.3619999885559082},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.35929998755455017},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.3562999963760376},{"id":"https://openalex.org/keywords/axiom","display_name":"Axiom","score":0.35429999232292175}],"concepts":[{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.6507999897003174},{"id":"https://openalex.org/C202615002","wikidata":"https://www.wikidata.org/wiki/Q783507","display_name":"Differentiable function","level":2,"score":0.6111000180244446},{"id":"https://openalex.org/C22324862","wikidata":"https://www.wikidata.org/wiki/Q652707","display_name":"Lipschitz continuity","level":2,"score":0.5202999711036682},{"id":"https://openalex.org/C145691206","wikidata":"https://www.wikidata.org/wiki/Q747980","display_name":"Polytope","level":2,"score":0.5184999704360962},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4982999861240387},{"id":"https://openalex.org/C200331156","wikidata":"https://www.wikidata.org/wiki/Q506041","display_name":"Jacobian matrix and determinant","level":2,"score":0.49140000343322754},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.36469998955726624},{"id":"https://openalex.org/C81184566","wikidata":"https://www.wikidata.org/wiki/Q1191895","display_name":"Conjugate gradient method","level":2,"score":0.3619999885559082},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.35929998755455017},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.3562999963760376},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.3549000024795532},{"id":"https://openalex.org/C167729594","wikidata":"https://www.wikidata.org/wiki/Q17736","display_name":"Axiom","level":2,"score":0.35429999232292175},{"id":"https://openalex.org/C158968445","wikidata":"https://www.wikidata.org/wiki/Q7631150","display_name":"Subgradient method","level":2,"score":0.349700003862381},{"id":"https://openalex.org/C167431342","wikidata":"https://www.wikidata.org/wiki/Q1754327","display_name":"Preconditioner","level":3,"score":0.33869999647140503},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.32670000195503235},{"id":"https://openalex.org/C2011187","wikidata":"https://www.wikidata.org/wiki/Q383851","display_name":"Directional derivative","level":2,"score":0.32589998841285706},{"id":"https://openalex.org/C38349280","wikidata":"https://www.wikidata.org/wiki/Q1434290","display_name":"Flow (mathematics)","level":2,"score":0.32409998774528503},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.31360000371932983},{"id":"https://openalex.org/C203616005","wikidata":"https://www.wikidata.org/wiki/Q620495","display_name":"Hessian matrix","level":2,"score":0.311599999666214},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.3077999949455261},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.304500013589859},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.302700012922287},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.2928999960422516},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.27239999175071716},{"id":"https://openalex.org/C128107574","wikidata":"https://www.wikidata.org/wiki/Q182003","display_name":"Injective function","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.26989999413490295},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.2694000005722046},{"id":"https://openalex.org/C129782007","wikidata":"https://www.wikidata.org/wiki/Q162886","display_name":"Euclidean geometry","level":2,"score":0.2685999870300293},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.25619998574256897},{"id":"https://openalex.org/C41045048","wikidata":"https://www.wikidata.org/wiki/Q202843","display_name":"Linear programming","level":2,"score":0.25209999084472656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.06036","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.06036","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.06036","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.06036","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Life in Land","score":0.5243046879768372,"id":"https://metadata.un.org/sdg/15"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"introduces":[2],"a":[3,54,101],"differentiable":[4,102],"framework":[5],"that":[6,121,135],"embeds":[7],"the":[8,27,45,62,66,73,79,98,105,110,126],"axiomatic":[9],"structure":[10],"of":[11,65,143],"Random":[12],"Utility":[13],"Models":[14],"(RUM)":[15],"directly":[16],"into":[17],"deep":[18],"neural":[19],"networks.":[20],"Although":[21],"projecting":[22],"empirical":[23],"choice":[24],"data":[25,147],"onto":[26],"RUM":[28,39],"polytope":[29],"is":[30],"NP-hard":[31],"in":[32,130],"general,":[33],"we":[34,52],"uncover":[35],"an":[36],"isomorphism":[37],"between":[38],"consistency":[40],"and":[41,87,141],"flow":[42],"conservation":[43],"on":[44],"Boolean":[46],"lattice.":[47],"Leveraging":[48],"this":[49,122],"combinatorial":[50],"structure,":[51],"derive":[53],"novel":[55],"Tree-Preconditioned":[56],"Conjugate":[57],"Gradient":[58],"solver.":[59],"By":[60],"exploiting":[61],"spanning":[63],"tree":[64],"constraint":[67],"graph,":[68],"our":[69],"preconditioner":[70],"effectively":[71],"\"whitens\"":[72],"ill-conditioned":[74],"Hessian":[75],"spectrum":[76],"induced":[77],"by":[78],"Interior":[80],"Point":[81],"Method":[82],"barrier,":[83],"achieving":[84],"superlinear":[85],"convergence":[86],"scaling":[88],"to":[89],"problem":[90],"sizes":[91],"previously":[92],"deemed":[93],"unsolvable.":[94],"We":[95],"further":[96],"formulate":[97],"projection":[99],"as":[100],"layer":[103],"via":[104],"Implicit":[106],"Function":[107],"Theorem,":[108],"where":[109,149],"exact":[111],"Jacobian":[112],"propagates":[113],"geometric":[114],"constraints":[115],"during":[116],"backpropagation.":[117],"Empirical":[118],"results":[119],"demonstrate":[120],"\"Axioms-as-Layers\"":[123],"paradigm":[124],"eliminates":[125],"structural":[127],"overfitting":[128],"inherent":[129],"penalty-based":[131],"methods,":[132],"enabling":[133],"models":[134],"are":[136],"jointly":[137],"trainable,":[138],"provably":[139],"rational,":[140],"capable":[142],"generalizing":[144],"from":[145],"sparse":[146],"regimes":[148],"standard":[150],"approximations":[151],"fail.":[152]},"counts_by_year":[],"updated_date":"2026-01-14T23:44:37.837170","created_date":"2026-01-14T00:00:00"}
