{"id":"https://openalex.org/W4416359300","doi":"https://doi.org/10.48550/arxiv.2511.12653","title":"DPVO-QAT++: Heterogeneous QAT and CUDA Kernel Fusion for High-Performance Deep Patch Visual Odometry","display_name":"DPVO-QAT++: Heterogeneous QAT and CUDA Kernel Fusion for High-Performance Deep Patch Visual Odometry","publication_year":2025,"publication_date":"2025-11-16","ids":{"openalex":"https://openalex.org/W4416359300","doi":"https://doi.org/10.48550/arxiv.2511.12653"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2511.12653","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2511.12653","pdf_url":"https://arxiv.org/pdf/2511.12653","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2511.12653","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100724400","display_name":"Cheng Liao","orcid":"https://orcid.org/0000-0002-9469-030X"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liao, Cheng","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5100724400"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.9186000227928162,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.9186000227928162,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.029400000348687172,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.00570000009611249,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.6960999965667725},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5789999961853027},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.5351999998092651},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.48420000076293945},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.45969998836517334},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.4553000032901764},{"id":"https://openalex.org/keywords/visual-odometry","display_name":"Visual odometry","score":0.4041000008583069},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.4032000005245209}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7409999966621399},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.6960999965667725},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6061000227928162},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5789999961853027},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.5351999998092651},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5166000127792358},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.48420000076293945},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.45969998836517334},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.4553000032901764},{"id":"https://openalex.org/C5799516","wikidata":"https://www.wikidata.org/wiki/Q4110915","display_name":"Visual odometry","level":3,"score":0.4041000008583069},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.4032000005245209},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.39730000495910645},{"id":"https://openalex.org/C49441653","wikidata":"https://www.wikidata.org/wiki/Q2014717","display_name":"Odometry","level":4,"score":0.37869998812675476},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.3693999946117401},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.3278999924659729},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.3089999854564667},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.2849999964237213},{"id":"https://openalex.org/C122280245","wikidata":"https://www.wikidata.org/wiki/Q620622","display_name":"Kernel method","level":3,"score":0.25850000977516174},{"id":"https://openalex.org/C69744172","wikidata":"https://www.wikidata.org/wiki/Q860822","display_name":"Image fusion","level":3,"score":0.25529998540878296}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2511.12653","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2511.12653","pdf_url":"https://arxiv.org/pdf/2511.12653","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2511.12653","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.12653","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2511.12653","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2511.12653","pdf_url":"https://arxiv.org/pdf/2511.12653","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Deep":[0,41,230],"learning-based":[1],"Visual":[2,43,59,218,232],"SLAM":[3],"(vSLAM)":[4],"systems":[5],"exhibit":[6],"exceptional":[7],"geometric":[8],"reasoning":[9],"capabilities,":[10],"yet":[11],"their":[12],"prohibitive":[13],"computational":[14],"overhead":[15],"severely":[16],"restricts":[17],"deployment":[18],"on":[19,213],"resource-constrained":[20],"autonomous":[21],"platforms.":[22,216],"This":[23],"paper":[24],"presents":[25],"a":[26,53,116,123,158,165,203],"hierarchical":[27],"quantization":[28,68,80],"optimization":[29],"framework,":[30],"DPVO-QAT++":[31,186],"(DPVO-QAT++:":[32],"Heterogeneous":[33,220],"QAT":[34],"and":[35,63,74,90,122,164,195],"CUDA":[36,82,225],"Kernel":[37,226,235],"Fusion":[38],"for":[39,57,78,199,207],"High-Performance":[40],"Patch":[42,231],"Odometry).":[44],"Through":[45],"the":[46,58,96,100,104,138,147,189,196,208],"synergistic":[47],"integration":[48],"of":[49,99,114,156,210],"learnable":[50],"scale":[51],"parameterization,":[52],"heterogeneous":[54],"precision":[55],"design":[56],"Odometry":[60],"(VO)":[61],"front-end":[62],"back-end":[64,71],"(front-end":[65],"floating-point":[66],"fake":[67,79],"with":[69],"FP16/FP32;":[70],"full":[72],"precision),":[73],"GPU-native":[75],"kernel":[76],"fusion":[77],"(custom":[81],"kernels),":[83],"our":[84,107],"framework":[85,108],"significantly":[86],"reduces":[87],"memory":[88,129,171],"footprint":[89],"increases":[91],"processing":[92],"speed":[93],"while":[94,131],"preserving":[95],"trajectory":[97,133,175],"accuracy":[98,134,176],"original":[101,139],"model.":[102],"On":[103,146],"TartanAir":[105],"dataset,":[106,149],"achieves":[109],"an":[110,152],"average":[111,153],"FPS":[112,154],"increase":[113,155],"52.1%,":[115],"29.1%":[117],"reduction":[118,125,160,167],"in":[119,126,161,168],"median":[120,162],"latency,":[121,163],"64.9%":[124],"peak":[127,169],"GPU":[128,170],"reservation,":[130,172],"maintaining":[132,173],"(ATE)":[135,177],"comparable":[136,174],"to":[137],"DPVO":[140],"model":[141],"across":[142,178],"32":[143],"validation":[144,180],"sequences.":[145,181],"EuRoC":[148],"it":[150],"realizes":[151],"30.1%,":[157],"23.1%":[159],"37.7%":[166],"11":[179],"Experimental":[182],"results":[183],"demonstrate":[184],"that":[185],"effectively":[187],"bridges":[188],"gap":[190],"between":[191],"high-precision":[192],"deep":[193],"VO":[194],"efficiency":[197],"requirements":[198],"practical":[200],"deployment,":[201],"offering":[202],"viable":[204],"engineering":[205],"paradigm":[206],"application":[209],"this":[211],"technology":[212],"real-world":[214],"embedded":[215],"Keywords:":[217],"Odometry,":[219,233],"Precision":[221],"Architecture,":[222],"Quantization-Aware":[223],"Training,":[224,229],"Fusion,":[227],"Scale-Only":[228],"GPU-Native":[234],"Fusion.":[236]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-19T00:00:00"}
