{"id":"https://openalex.org/W7155429310","doi":"https://doi.org/10.48550/arxiv.2604.20393","title":"MLG-Stereo: ViT Based Stereo Matching with Multi-Stage Local-Global Enhancement","display_name":"MLG-Stereo: ViT Based Stereo Matching with Multi-Stage Local-Global Enhancement","publication_year":2026,"publication_date":"2026-04-22","ids":{"openalex":"https://openalex.org/W7155429310","doi":"https://doi.org/10.48550/arxiv.2604.20393"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.20393","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.20393","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.20393","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134364850","display_name":"Haoyu Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Haoyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134460668","display_name":"Jingyi Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Jingyi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134431420","display_name":"Peng Ye","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye, Peng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134404726","display_name":"Jiakang Yuan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan, Jiakang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134435528","display_name":"Lin Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Lin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134451255","display_name":"Feng Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Feng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5134367771","display_name":"Tao Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Tao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5134364850"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.98089998960495,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.98089998960495,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.003700000001117587,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11019","display_name":"Image Enhancement Techniques","score":0.0026000000070780516,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6912000179290771},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5306000113487244},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.47209998965263367},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4659999907016754},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.4408999979496002},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4404999911785126},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.41339999437332153},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.3971000015735626}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7601000070571899},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6917999982833862},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6912000179290771},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5554999709129333},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5306000113487244},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.47209998965263367},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4659999907016754},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.4408999979496002},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4404999911785126},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.41339999437332153},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.3971000015735626},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3903000056743622},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.3799999952316284},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.3686000108718872},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.34439998865127563},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.3248000144958496},{"id":"https://openalex.org/C205372480","wikidata":"https://www.wikidata.org/wiki/Q210521","display_name":"Image resolution","level":2,"score":0.3199999928474426},{"id":"https://openalex.org/C68537008","wikidata":"https://www.wikidata.org/wiki/Q247932","display_name":"Stereopsis","level":2,"score":0.2978000044822693},{"id":"https://openalex.org/C2983787585","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Feature matching","level":3,"score":0.28610000014305115},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C164752517","wikidata":"https://www.wikidata.org/wiki/Q5570875","display_name":"Global optimization","level":2,"score":0.26350000500679016}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.20393","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.20393","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.20393","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.20393","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.4004349708557129,"id":"https://metadata.un.org/sdg/17","display_name":"Partnerships for the goals"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,25,40,76,106,139,143,165,180],"development":[2],"of":[3,27,37,42,57,101,145],"deep":[4],"learning,":[5],"ViT-based":[6,43],"stereo":[7],"matching":[8,126],"methods":[9,44],"have":[10],"made":[11],"significant":[12],"progress":[13],"due":[14,23],"to":[15,24,45,86,120,136,171],"their":[16,34],"remarkable":[17],"robustness":[18],"and":[19,33,48,91,104,110,124,167,175],"zero-shot":[20],"ability.":[21],"However,":[22],"limitations":[26],"ViTs":[28],"in":[29,179],"handling":[30],"resolution":[31,103],"sensitivity":[32],"relative":[35],"neglect":[36],"local":[38,92],"information,":[39,94],"ability":[41],"predict":[46],"details":[47],"handle":[49],"arbitrary-resolution":[50],"images":[51,100],"is":[52,118,134],"still":[53],"weaker":[54],"than":[55],"that":[56,71,157],"CNN-based":[58],"methods.":[59],"To":[60],"address":[61],"these":[62],"shortcomings,":[63],"we":[64,80],"propose":[65,81],"MLG-Stereo,":[66],"a":[67,82,114,129],"systematic":[68],"pipeline-level":[69],"design":[70],"extends":[72],"global":[73,89,146],"modeling":[74],"beyond":[75],"encoder":[77],"stage.":[78],"First,":[79],"Multi-Granularity":[83],"Feature":[84],"Network":[85],"effectively":[87],"balance":[88],"context":[90],"geometric":[93],"enabling":[95],"comprehensive":[96],"feature":[97],"extraction":[98],"from":[99],"arbitrary":[102],"bridging":[105],"gap":[107],"between":[108],"training":[109],"inference":[111],"scales.":[112],"Then,":[113],"Local-Global":[115,130],"Cost":[116],"Volume":[117],"constructed":[119],"capture":[121],"both":[122],"locally-correlated":[123],"global-aware":[125],"information.":[127,147],"Finally,":[128],"Guided":[131],"Recurrent":[132],"Unit":[133],"introduced":[135],"iteratively":[137],"optimize":[138],"disparity":[140],"locally":[141],"under":[142],"guidance":[144],"Extensive":[148],"experiments":[149],"are":[150],"conducted":[151],"on":[152,164],"multiple":[153],"benchmark":[154],"datasets,":[155],"demonstrating":[156],"our":[158],"MLG-Stereo":[159],"exhibits":[160],"highly":[161],"competitive":[162],"performance":[163],"Middlebury":[166],"KITTI-2015":[168],"benchmarks":[169],"compared":[170],"contemporaneous":[172],"leading":[173],"methods,":[174],"achieves":[176],"outstanding":[177],"results":[178],"KITTI-2012":[181],"dataset.":[182]},"counts_by_year":[],"updated_date":"2026-04-24T06:07:52.864757","created_date":"2026-04-24T00:00:00"}
