{"id":"https://openalex.org/W7162516743","doi":"https://doi.org/10.48550/arxiv.2605.26725","title":"Joint 2D-3D Segmentation and Association in Street-level Imaging","display_name":"Joint 2D-3D Segmentation and Association in Street-level Imaging","publication_year":2026,"publication_date":"2026-05-26","ids":{"openalex":"https://openalex.org/W7162516743","doi":"https://doi.org/10.48550/arxiv.2605.26725"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.26725","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.26725","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.26725","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5137109547","display_name":"Amir Melnikov","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Melnikov, Amir","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5137134801","display_name":"Masayuki Tanaka","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tanaka, Masayuki","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039862436","display_name":"Yusuke Monno","orcid":"https://orcid.org/0000-0001-6733-3406"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Monno, Yusuke","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5024453747","display_name":"Masatoshi Okutomi","orcid":"https://orcid.org/0000-0001-5787-0742"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Okutomi, Masatoshi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.5253000259399414,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.5253000259399414,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.15459999442100525,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13282","display_name":"Automated Road and Building Extraction","score":0.05249999836087227,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7124999761581421},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.5090000033378601},{"id":"https://openalex.org/keywords/image-segmentation","display_name":"Image segmentation","score":0.47920000553131104},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.4575999975204468},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.45509999990463257},{"id":"https://openalex.org/keywords/association","display_name":"Association (psychology)","score":0.43720000982284546},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.4357999861240387},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4207000136375427},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4009000062942505}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7616999745368958},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7124999761581421},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6941999793052673},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6625000238418579},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.5090000033378601},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.47920000553131104},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.4575999975204468},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.45509999990463257},{"id":"https://openalex.org/C142853389","wikidata":"https://www.wikidata.org/wiki/Q744778","display_name":"Association (psychology)","level":2,"score":0.43720000982284546},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.4357999861240387},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4207000136375427},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4009000062942505},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.39100000262260437},{"id":"https://openalex.org/C2776035091","wikidata":"https://www.wikidata.org/wiki/Q7928819","display_name":"Viewpoints","level":2,"score":0.37959998846054077},{"id":"https://openalex.org/C65885262","wikidata":"https://www.wikidata.org/wiki/Q7429708","display_name":"Scale-space segmentation","level":4,"score":0.3714999854564667},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.3334999978542328},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.33180001378059387},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3303999900817871},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.30799999833106995},{"id":"https://openalex.org/C25694479","wikidata":"https://www.wikidata.org/wiki/Q7446278","display_name":"Segmentation-based object categorization","level":5,"score":0.30550000071525574},{"id":"https://openalex.org/C9417928","wikidata":"https://www.wikidata.org/wiki/Q1070689","display_name":"Image processing","level":3,"score":0.29739999771118164},{"id":"https://openalex.org/C2983325608","wikidata":"https://www.wikidata.org/wiki/Q17084606","display_name":"Data association","level":3,"score":0.29100000858306885},{"id":"https://openalex.org/C159620131","wikidata":"https://www.wikidata.org/wiki/Q1938983","display_name":"Spatial analysis","level":2,"score":0.2849000096321106},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.2766000032424927},{"id":"https://openalex.org/C64876066","wikidata":"https://www.wikidata.org/wiki/Q5141226","display_name":"Cognitive neuroscience of visual object recognition","level":3,"score":0.2745000123977661},{"id":"https://openalex.org/C63099799","wikidata":"https://www.wikidata.org/wiki/Q17147001","display_name":"Image texture","level":4,"score":0.2612000107765198},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.25999999046325684},{"id":"https://openalex.org/C16345878","wikidata":"https://www.wikidata.org/wiki/Q107472979","display_name":"Orientation (vector space)","level":2,"score":0.2540000081062317}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.26725","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.26725","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.26725","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.26725","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities","score":0.8441807627677917}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Accurate":[0],"interpretation":[1],"of":[2,14,116,124],"street-level":[3,107],"imagery":[4],"is":[5,103],"essential":[6],"for":[7,26,49,105,113],"large-scale":[8],"urban":[9,145],"mapping":[10],"and":[11,30,57,87,109,127],"the":[12,100],"creation":[13],"Spatial":[15],"Digital":[16],"Twin":[17],"(SDT)":[18],"environments.":[19],"This":[20],"work":[21],"presents":[22],"a":[23,114,139],"unified":[24],"framework":[25],"joint":[27],"2D-3D":[28],"segmentation":[29,58],"association":[31,70],"that":[32,43],"integrates":[33],"visual":[34],"semantics":[35],"with":[36,60,96],"multi-view":[37],"geometric":[38,78],"reasoning.":[39],"Unlike":[40],"conventional":[41],"approaches":[42],"rely":[44],"heavily":[45],"on":[46],"sequential":[47],"frames":[48],"temporal":[50],"tracking,":[51,76],"our":[52],"method":[53],"leverages":[54],"zero-shot":[55],"detection":[56],"together":[59],"structure-from-motion":[61],"reconstruction":[62],"to":[63,80,133],"establish":[64],"stable":[65],"cross-view":[66],"correspondences.":[67],"A":[68],"3D-driven":[69],"mechanism":[71],"replaces":[72],"traditional":[73],"2D":[74,93],"multi-object":[75],"using":[77],"consistency":[79],"guide":[81],"identity":[82,130],"preservation":[83],"across":[84],"wide-baseline":[85],"viewpoints":[86],"varying":[88],"imaging":[89],"conditions.":[90],"By":[91],"combining":[92],"texture":[94],"cues":[95],"global":[97],"3D":[98],"context,":[99],"proposed":[101],"pipeline":[102],"well-suited":[104],"scalable":[106],"processing":[108],"can":[110],"be":[111],"used":[112],"variety":[115],"object":[117],"types.":[118],"Experiments":[119],"demonstrate":[120],"substantially":[121],"improved":[122],"coverage":[123],"ground-truth":[125],"sequences":[126],"more":[128],"robust":[129],"retention":[131],"compared":[132],"state-of-the-art":[134],"2D-only":[135],"tracking":[136],"methods,":[137],"achieving":[138],"22%":[140],"performance":[141],"gain":[142],"in":[143],"challenging":[144],"scenarios.":[146]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-28T00:00:00"}
