{"id":"https://openalex.org/W7154753042","doi":"https://doi.org/10.48550/arxiv.2604.14769","title":"Constraint-based Pre-training: From Structured Constraints to Scalable Model Initialization","display_name":"Constraint-based Pre-training: From Structured Constraints to Scalable Model Initialization","publication_year":2026,"publication_date":"2026-04-16","ids":{"openalex":"https://openalex.org/W7154753042","doi":"https://doi.org/10.48550/arxiv.2604.14769"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.14769","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14769","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.14769","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133833211","display_name":"Fu Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Feng, Fu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133841834","display_name":"Yucheng Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xie, Yucheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133877013","display_name":"Ruixiao Shi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Ruixiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133876778","display_name":"Jing Wang","orcid":"https://orcid.org/0000-0001-7463-6894"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Jing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133838065","display_name":"Xin Geng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Geng, Xin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5133833211"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.650600016117096,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.650600016117096,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.0544000007212162,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.04969999939203262,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/initialization","display_name":"Initialization","score":0.8511999845504761},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6302000284194946},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5630999803543091},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.44519999623298645},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.44190001487731934},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4293999969959259}],"concepts":[{"id":"https://openalex.org/C114466953","wikidata":"https://www.wikidata.org/wiki/Q6034165","display_name":"Initialization","level":2,"score":0.8511999845504761},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7495999932289124},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6302000284194946},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5630999803543091},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.44519999623298645},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.44190001487731934},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4293999969959259},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39079999923706055},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.38089999556541443},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.3758000135421753},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3513000011444092},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3310000002384186},{"id":"https://openalex.org/C87619178","wikidata":"https://www.wikidata.org/wiki/Q126002","display_name":"Concatenation (mathematics)","level":2,"score":0.3215999901294708},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.30809998512268066},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2856000065803528},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27239999175071716},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2619999945163727},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.25049999356269836}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.14769","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14769","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.14769","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.14769","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"pre-training":[1,15,53,60,101],"and":[2,115,137,152,165,173,182,192,199],"fine-tuning":[3],"paradigm":[4,54],"has":[5],"become":[6],"the":[7,100,150],"dominant":[8],"approach":[9],"for":[10],"model":[11,37,80,104,141],"adaptation.":[12],"However,":[13],"conventional":[14],"typically":[16],"yields":[17],"models":[18,28,161],"at":[19],"a":[20,83,168],"fixed":[21],"scale,":[22],"whereas":[23],"practical":[24],"deployment":[25],"often":[26],"requires":[27],"of":[29,110,140,154,171],"varying":[30,163],"sizes,":[31],"exposing":[32],"its":[33,186],"limitations":[34],"when":[35],"target":[36],"scales":[38],"differ":[39],"from":[40,130],"those":[41],"used":[42],"during":[43,59],"pre-training.":[44],"To":[45],"address":[46],"this,":[47],"we":[48,90],"propose":[49],"an":[50],"innovative":[51],"constraint-based":[52],"that":[55],"imposes":[56],"structured":[57],"constraints":[58,97],"to":[61,73,98,189],"disentangle":[62],"size-agnostic":[63],"knowledge":[64],"into":[65],"reusable":[66],"weight":[67,75,111,124],"templates,":[68],"while":[69],"assigning":[70],"size-specific":[71],"adaptation":[72,85],"lightweight":[74,123],"scalers,":[76],"thereby":[77],"reformulating":[78],"variable-sized":[79],"initialization":[81],"as":[82,108],"multi-task":[84],"problem.":[86],"Within":[87],"this":[88],"paradigm,":[89],"further":[91],"introduce":[92],"WeiT,":[93,155],"which":[94],"employs":[95],"Kronecker-based":[96],"regularize":[99],"process.":[102],"Specifically,":[103],"parameters":[105,127],"are":[106,128],"represented":[107],"compositions":[109],"templates":[112],"via":[113],"concatenation":[114],"weighted":[116],"aggregation,":[117],"with":[118,162],"adaptive":[119],"connections":[120],"governed":[121],"by":[122],"scalers":[125],"whose":[126],"learned":[129],"limited":[131],"data.":[132],"This":[133],"design":[134],"enables":[135],"flexible":[136],"efficient":[138],"construction":[139],"weights":[142],"across":[143,167],"diverse":[144],"downstream":[145],"scales.":[146],"Extensive":[147],"experiments":[148],"demonstrate":[149],"efficiency":[151],"effectiveness":[153,187],"achieving":[156],"state-of-the-art":[157],"performance":[158,201],"in":[159],"initializing":[160],"depths":[164],"widths":[166],"broad":[169],"range":[170],"perception":[172],"embodied":[174],"learning":[175],"tasks,":[176],"including":[177],"Image":[178,180],"Classification,":[179],"Generation,":[181],"Embodied":[183],"Control.":[184],"Moreover,":[185],"generalizes":[188],"both":[190],"Transformer-based":[191],"Convolution-based":[193],"architectures,":[194],"consistently":[195],"enabling":[196],"faster":[197],"convergence":[198],"improved":[200],"even":[202],"under":[203],"full":[204],"training.":[205]},"counts_by_year":[],"updated_date":"2026-04-18T06:05:20.339008","created_date":"2026-04-18T00:00:00"}
