{"id":"https://openalex.org/W7162540947","doi":"https://doi.org/10.1109/3dv69130.2026.00037","title":"SAIL-Recon: Large SfM by Augmenting Scene Regression with Localization","display_name":"SAIL-Recon: Large SfM by Augmenting Scene Regression with Localization","publication_year":2026,"publication_date":"2026-03-20","ids":{"openalex":"https://openalex.org/W7162540947","doi":"https://doi.org/10.1109/3dv69130.2026.00037"},"language":null,"primary_location":{"id":"doi:10.1109/3dv69130.2026.00037","is_oa":false,"landing_page_url":"https://doi.org/10.1109/3dv69130.2026.00037","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on 3D Vision (3DV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072294820","display_name":"Junyuan Deng","orcid":"https://orcid.org/0000-0001-7331-0235"},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Junyuan Deng","raw_affiliation_strings":["The Hong Kong University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077753068","display_name":"H L Li","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Heng Li","raw_affiliation_strings":["The Hong Kong University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137121610","display_name":"Tao Xie","orcid":null},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Tao Xie","raw_affiliation_strings":["Horizon Robotics"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Horizon Robotics","institution_ids":["https://openalex.org/I4401726824"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137120636","display_name":"Weiqiang Ren","orcid":null},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Weiqiang Ren","raw_affiliation_strings":["Horizon Robotics"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Horizon Robotics","institution_ids":["https://openalex.org/I4401726824"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137191716","display_name":"Qian Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Qian Zhang","raw_affiliation_strings":["Horizon Robotics"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Horizon Robotics","institution_ids":["https://openalex.org/I4401726824"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137117320","display_name":"Ping Tan","orcid":null},"institutions":[{"id":"https://openalex.org/I200769079","display_name":"Hong Kong University of Science and Technology","ror":"https://ror.org/00q4vv597","country_code":"HK","type":"education","lineage":["https://openalex.org/I200769079"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Ping Tan","raw_affiliation_strings":["The Hong Kong University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Hong Kong University of Science and Technology","institution_ids":["https://openalex.org/I200769079"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5137120797","display_name":"Xiaoyang Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I4401726824","display_name":"Horizon Robotics (China)","ror":"https://ror.org/05cmv6g68","country_code":null,"type":"company","lineage":["https://openalex.org/I4401726824"]}],"countries":[],"is_corresponding":false,"raw_author_name":"Xiaoyang Guo","raw_affiliation_strings":["Horizon Robotics"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Horizon Robotics","institution_ids":["https://openalex.org/I4401726824"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.85423769,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"309","last_page":"329"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.08760000020265579,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.08760000020265579,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.08009999990463257,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.07940000295639038,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/regression","display_name":"Regression","score":0.3865000009536743},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37869998812675476},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.3057999908924103},{"id":"https://openalex.org/keywords/linear-regression","display_name":"Linear regression","score":0.3057999908924103},{"id":"https://openalex.org/keywords/regression-analysis","display_name":"Regression analysis","score":0.299699991941452}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5952000021934509},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5519000291824341},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.507099986076355},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.3865000009536743},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37869998812675476},{"id":"https://openalex.org/C48921125","wikidata":"https://www.wikidata.org/wiki/Q10861030","display_name":"Linear regression","level":2,"score":0.3057999908924103},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3057999908924103},{"id":"https://openalex.org/C152877465","wikidata":"https://www.wikidata.org/wiki/Q208042","display_name":"Regression analysis","level":2,"score":0.299699991941452},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.2685000002384186},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2676999866962433},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.25690001249313354}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/3dv69130.2026.00037","is_oa":false,"landing_page_url":"https://doi.org/10.1109/3dv69130.2026.00037","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2026 International Conference on 3D Vision (3DV)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6475468873977661,"display_name":"Climate action","id":"https://metadata.un.org/sdg/13"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":82,"referenced_works":["https://openalex.org/W1989476314","https://openalex.org/W2021851106","https://openalex.org/W2097649661","https://openalex.org/W2107181183","https://openalex.org/W2112634643","https://openalex.org/W2124313187","https://openalex.org/W2163446794","https://openalex.org/W2199898507","https://openalex.org/W2200124539","https://openalex.org/W2471962767","https://openalex.org/W2556455135","https://openalex.org/W2728498859","https://openalex.org/W2738551266","https://openalex.org/W2781228439","https://openalex.org/W2892865870","https://openalex.org/W2962705366","https://openalex.org/W2962793285","https://openalex.org/W2963760790","https://openalex.org/W2964153986","https://openalex.org/W2964175348","https://openalex.org/W2979458572","https://openalex.org/W2982101479","https://openalex.org/W2983230029","https://openalex.org/W3000674297","https://openalex.org/W3034275286","https://openalex.org/W3035257660","https://openalex.org/W3043075211","https://openalex.org/W3043971245","https://openalex.org/W3097660860","https://openalex.org/W3097708177","https://openalex.org/W3110153602","https://openalex.org/W3141835154","https://openalex.org/W3166285241","https://openalex.org/W3175531575","https://openalex.org/W3176602998","https://openalex.org/W3193951565","https://openalex.org/W3194532238","https://openalex.org/W3196466825","https://openalex.org/W3203570626","https://openalex.org/W4200150166","https://openalex.org/W4214520160","https://openalex.org/W4214564845","https://openalex.org/W4214768561","https://openalex.org/W4226136443","https://openalex.org/W4226399456","https://openalex.org/W4256017923","https://openalex.org/W4281493828","https://openalex.org/W4312532612","https://openalex.org/W4312581984","https://openalex.org/W4378450585","https://openalex.org/W4385245566","https://openalex.org/W4385318467","https://openalex.org/W4386066457","https://openalex.org/W4386071582","https://openalex.org/W4386071629","https://openalex.org/W4390187496","https://openalex.org/W4390872507","https://openalex.org/W4390872919","https://openalex.org/W4390872990","https://openalex.org/W4390873101","https://openalex.org/W4390874416","https://openalex.org/W4399563713","https://openalex.org/W4402704622","https://openalex.org/W4402727698","https://openalex.org/W4402753851","https://openalex.org/W4402775760","https://openalex.org/W4402816534","https://openalex.org/W4403562222","https://openalex.org/W4403877825","https://openalex.org/W4404002617","https://openalex.org/W4404198699","https://openalex.org/W4413146238","https://openalex.org/W4413147031","https://openalex.org/W4413147040","https://openalex.org/W4413147177","https://openalex.org/W4413147688","https://openalex.org/W4413155739","https://openalex.org/W4413156496","https://openalex.org/W4413157191","https://openalex.org/W4413556455","https://openalex.org/W4413559025","https://openalex.org/W7160165506"],"related_works":[],"abstract_inverted_index":{"Scene":[0],"regression":[1,65,88],"methods,":[2],"such":[3],"as":[4],"VGGT":[5],"[86],":[6],"solve":[7],"the":[8,63],"Structure-from-Motion":[9],"(SfM)":[10],"problem":[11],"by":[12,61],"directly":[13],"regressing":[14],"camera":[15,124],"poses":[16],"and":[17,127,135,140],"3D":[18],"scene":[19,64,78,102],"structures":[20],"from":[21,81],"input":[22,45,96],"images.":[23,46,86],"They":[24],"demonstrate":[25],"impressive":[26],"performance":[27],"in":[28],"handling":[29],"images":[30,97],"under":[31],"extreme":[32],"viewpoint":[33],"changes.":[34],"However,":[35],"these":[36],"methods":[37],"struggle":[38],"to":[39,93,114],"handle":[40],"a":[41,54,76,82],"large":[42,58],"number":[43],"of":[44,84],"To":[47],"address":[48],"this":[49,100],"problem,":[50],"we":[51],"introduce":[52],"SAIL-Recon,":[53],"feed-forward":[55],"Transformer":[56],"for":[57],"scale":[59],"SfM,":[60],"augmenting":[62],"network":[66,89],"with":[67],"visual":[68],"localization":[69],"capabilities.":[70],"Specifically,":[71],"our":[72,108],"method":[73,109],"first":[74],"computes":[75],"neural":[77,101],"representation":[79],"tokens":[80],"subset":[83],"anchor":[85],"The":[87],"is":[90],"then":[91],"fine-tuned":[92],"reconstruct":[94],"all":[95],"conditioned":[98],"on":[99,122],"representation.":[103],"Comprehensive":[104],"experiments":[105],"show":[106],"that":[107],"not":[110],"only":[111],"scales":[112],"efficiently":[113],"large-scale":[115],"scenes,":[116],"but":[117],"also":[118],"achieves":[119],"state-of-the-art":[120],"results":[121],"both":[123],"pose":[125],"estimation":[126],"novel":[128],"view":[129],"synthesis":[130],"benchmarks,":[131],"including":[132],"TUM-RGBD,":[133],"CO3Dv2,":[134],"Tanks":[136],"&":[137],"Temples.":[138],"Code":[139],"models":[141],"are":[142],"publicly":[143],"available":[144],"here.":[145]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-28T00:00:00"}
