{"id":"https://openalex.org/W7150896708","doi":"https://doi.org/10.48550/arxiv.2604.03092","title":"Flash-Mono: Feed-Forward Accelerated Gaussian Splatting Monocular SLAM","display_name":"Flash-Mono: Feed-Forward Accelerated Gaussian Splatting Monocular SLAM","publication_year":2026,"publication_date":"2026-04-03","ids":{"openalex":"https://openalex.org/W7150896708","doi":"https://doi.org/10.48550/arxiv.2604.03092"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.03092","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03092","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.03092","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133007266","display_name":"Zicheng Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Zicheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133024245","display_name":"Ke Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Ke","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100940584","display_name":"Xiangting Meng","orcid":"https://orcid.org/0009-0001-1906-1933"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng, Xiangting","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133006881","display_name":"Keyu Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Keyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133053039","display_name":"Jieru Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Jieru","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5102769588","display_name":"Wenchao Ding","orcid":"https://orcid.org/0000-0003-4249-526X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding, Wenchao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5133007266"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.7864999771118164,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.7864999771118164,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.10119999945163727,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.0502999983727932,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.6049000024795532},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5296000242233276},{"id":"https://openalex.org/keywords/gaussian-process","display_name":"Gaussian process","score":0.4977000057697296},{"id":"https://openalex.org/keywords/simultaneous-localization-and-mapping","display_name":"Simultaneous localization and mapping","score":0.42980000376701355},{"id":"https://openalex.org/keywords/monocular","display_name":"Monocular","score":0.4016000032424927},{"id":"https://openalex.org/keywords/thompson-sampling","display_name":"Thompson sampling","score":0.39399999380111694},{"id":"https://openalex.org/keywords/visual-hull","display_name":"Visual hull","score":0.3634999990463257},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.3587999939918518},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.3547999858856201}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7627999782562256},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6284999847412109},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.6049000024795532},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5795000195503235},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5296000242233276},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.4977000057697296},{"id":"https://openalex.org/C86369673","wikidata":"https://www.wikidata.org/wiki/Q1203659","display_name":"Simultaneous localization and mapping","level":4,"score":0.42980000376701355},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.4016000032424927},{"id":"https://openalex.org/C73602740","wikidata":"https://www.wikidata.org/wiki/Q7795822","display_name":"Thompson sampling","level":3,"score":0.39399999380111694},{"id":"https://openalex.org/C2776863239","wikidata":"https://www.wikidata.org/wiki/Q7936601","display_name":"Visual hull","level":3,"score":0.3634999990463257},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.3587999939918518},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.3547999858856201},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3224000036716461},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.3057999908924103},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.29739999771118164},{"id":"https://openalex.org/C57489055","wikidata":"https://www.wikidata.org/wiki/Q190046","display_name":"Ellipsoid","level":2,"score":0.2937000095844269},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.29269999265670776},{"id":"https://openalex.org/C109950114","wikidata":"https://www.wikidata.org/wiki/Q4464732","display_name":"3D reconstruction","level":2,"score":0.28110000491142273},{"id":"https://openalex.org/C181095308","wikidata":"https://www.wikidata.org/wiki/Q1541599","display_name":"Geometric primitive","level":2,"score":0.2784000039100647},{"id":"https://openalex.org/C2987834672","wikidata":"https://www.wikidata.org/wiki/Q4677630","display_name":"Action recognition","level":3,"score":0.27399998903274536},{"id":"https://openalex.org/C65892221","wikidata":"https://www.wikidata.org/wiki/Q1113935","display_name":"Gaussian filter","level":3,"score":0.2685999870300293},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.26499998569488525},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.26409998536109924},{"id":"https://openalex.org/C51267290","wikidata":"https://www.wikidata.org/wiki/Q5527848","display_name":"Gaussian random field","level":4,"score":0.26409998536109924},{"id":"https://openalex.org/C2779662243","wikidata":"https://www.wikidata.org/wiki/Q970395","display_name":"Shape context","level":3,"score":0.26260000467300415},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.26249998807907104},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.25940001010894775},{"id":"https://openalex.org/C166550679","wikidata":"https://www.wikidata.org/wiki/Q263400","display_name":"Gaussian network model","level":3,"score":0.2590000033378601},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.2547000050544739}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.03092","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03092","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.03092","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03092","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Monocular":[0],"3D":[1,176],"Gaussian":[2,47,72,110,115,177,181],"Splatting":[3,73],"SLAM":[4],"suffers":[5],"from":[6,20,32],"critical":[7],"limitations":[8],"in":[9,125,191],"time":[10],"efficiency,":[11],"geometric":[12,171],"accuracy,":[13],"and":[14,25,76,103,108,158,194,203],"multi-view":[15],"consistency.":[16],"These":[17],"issues":[18],"stem":[19],"the":[21,26,120,164],"time-consuming":[22],"$\\textit{Train-from-Scratch}$":[23],"optimization":[24,123,161],"lack":[27],"of":[28,62,138,167],"inter-frame":[29],"scale":[30],"consistency":[31],"single-frame":[33],"geometry":[34],"priors.":[35],"We":[36,56,83],"contend":[37],"that":[38,90,186],"a":[39,59,66,70,85,97,129],"feed-forward":[40,67,87],"paradigm,":[41],"leveraging":[42],"multi-frame":[43,93],"context":[44],"to":[45,162],"predict":[46],"attributes":[48],"directly,":[49],"is":[50],"crucial":[51],"for":[52,200],"addressing":[53],"these":[54],"challenges.":[55],"present":[57],"Flash-Mono,":[58],"system":[60],"composed":[61],"three":[63],"core":[64],"modules:":[65],"prediction":[68],"frontend,":[69],"2D":[71,180],"mapping":[74,195],"backend,":[75],"an":[77],"efficient":[78,144,155],"hidden-state-based":[79],"loop":[80,156],"closure":[81,157],"module.":[82],"trained":[84],"recurrent":[86,140],"frontend":[88],"model":[89],"progressively":[91],"aggregates":[92],"visual":[94],"features":[95],"into":[96],"hidden":[98,147],"state":[99],"via":[100],"cross":[101],"attention":[102],"jointly":[104],"predicts":[105],"camera":[106],"poses":[107],"per-pixel":[109],"properties.":[111],"By":[112],"directly":[113],"predicting":[114],"attributes,":[116],"our":[117,139],"method":[118],"bypasses":[119],"burdensome":[121],"per-frame":[122],"required":[124],"optimization-based":[126],"GS-SLAM,":[127],"achieving":[128],"$\\textbf{10x}$":[130],"speedup":[131],"while":[132],"ensuring":[133],"high-quality":[134],"rendering.":[135],"The":[136,146],"power":[137],"architecture":[141],"extends":[142],"beyond":[143],"prediction.":[145],"states":[148],"act":[149],"as":[150],"compact":[151],"submap":[152],"descriptors,":[153],"facilitating":[154],"global":[159],"$\\mathrm{Sim}(3)$":[160],"mitigate":[163],"long-standing":[165],"challenge":[166],"drift.":[168],"For":[169],"enhanced":[170],"fidelity,":[172],"we":[173],"replace":[174],"conventional":[175],"ellipsoids":[178],"with":[179],"surfels.":[182],"Extensive":[183],"experiments":[184],"demonstrate":[185],"Flash-Mono":[187],"achieves":[188],"state-of-the-art":[189],"performance":[190],"both":[192],"tracking":[193],"quality,":[196],"highlighting":[197],"its":[198],"potential":[199],"embodied":[201],"perception":[202],"real-time":[204],"reconstruction":[205],"applications.":[206],"Project":[207],"page:":[208],"https://victkk.github.io/flash-mono.":[209]},"counts_by_year":[],"updated_date":"2026-04-07T06:06:30.997549","created_date":"2026-04-07T00:00:00"}
