{"id":"https://openalex.org/W4312668843","doi":"https://doi.org/10.1109/iros47612.2022.9982285","title":"Instance Segmentation with Cross-Modal Consistency","display_name":"Instance Segmentation with Cross-Modal Consistency","publication_year":2022,"publication_date":"2022-10-23","ids":{"openalex":"https://openalex.org/W4312668843","doi":"https://doi.org/10.1109/iros47612.2022.9982285"},"language":"en","primary_location":{"id":"doi:10.1109/iros47612.2022.9982285","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros47612.2022.9982285","pdf_url":null,"source":{"id":"https://openalex.org/S4363607704","display_name":"2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070302005","display_name":"Alex Zihao Zhu","orcid":"https://orcid.org/0000-0002-2195-014X"},"institutions":[{"id":"https://openalex.org/I4210145145","display_name":"Nomor Research (Germany)","ror":"https://ror.org/04727qm97","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210145145"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Alex Zihao Zhu","raw_affiliation_strings":["Waymo LLC"],"affiliations":[{"raw_affiliation_string":"Waymo LLC","institution_ids":["https://openalex.org/I4210145145"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052810777","display_name":"Vincent Casser","orcid":null},"institutions":[{"id":"https://openalex.org/I4210145145","display_name":"Nomor Research (Germany)","ror":"https://ror.org/04727qm97","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210145145"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Vincent Casser","raw_affiliation_strings":["Waymo LLC"],"affiliations":[{"raw_affiliation_string":"Waymo LLC","institution_ids":["https://openalex.org/I4210145145"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023703380","display_name":"Reza Mahjourian","orcid":"https://orcid.org/0000-0002-4457-8395"},"institutions":[{"id":"https://openalex.org/I4210145145","display_name":"Nomor Research (Germany)","ror":"https://ror.org/04727qm97","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210145145"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Reza Mahjourian","raw_affiliation_strings":["Waymo LLC"],"affiliations":[{"raw_affiliation_string":"Waymo LLC","institution_ids":["https://openalex.org/I4210145145"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012355769","display_name":"Henrik Kretzschmar","orcid":null},"institutions":[{"id":"https://openalex.org/I4210145145","display_name":"Nomor Research (Germany)","ror":"https://ror.org/04727qm97","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210145145"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Henrik Kretzschmar","raw_affiliation_strings":["Waymo LLC"],"affiliations":[{"raw_affiliation_string":"Waymo LLC","institution_ids":["https://openalex.org/I4210145145"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022418359","display_name":"S\u00f6ren Pirk","orcid":"https://orcid.org/0000-0003-1937-9797"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"S\u00f6ren Pirk","raw_affiliation_strings":["Adobe Research (Work done while at Google Research)"],"affiliations":[{"raw_affiliation_string":"Adobe Research (Work done while at Google Research)","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5070302005"],"corresponding_institution_ids":["https://openalex.org/I4210145145"],"apc_list":null,"apc_paid":null,"fwci":0.1199,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.44310566,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"abs/1703. 10277","issue":null,"first_page":"2009","last_page":"2016"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11164","display_name":"Remote Sensing and LiDAR Applications","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/2305","display_name":"Environmental Engineering"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7812869548797607},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7658116817474365},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6647494435310364},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.6589560508728027},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5147888660430908},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.4738876223564148},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.42415761947631836},{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.42094871401786804},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.41560861468315125},{"id":"https://openalex.org/keywords/optical-flow","display_name":"Optical flow","score":0.4103127121925354},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.13239523768424988}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7812869548797607},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7658116817474365},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6647494435310364},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6589560508728027},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5147888660430908},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.4738876223564148},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.42415761947631836},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.42094871401786804},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.41560861468315125},{"id":"https://openalex.org/C155542232","wikidata":"https://www.wikidata.org/wiki/Q736111","display_name":"Optical flow","level":3,"score":0.4103127121925354},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.13239523768424988},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros47612.2022.9982285","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros47612.2022.9982285","pdf_url":null,"source":{"id":"https://openalex.org/S4363607704","display_name":"2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8299999833106995,"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1903029394","https://openalex.org/W1923115158","https://openalex.org/W1948751323","https://openalex.org/W2102605133","https://openalex.org/W2115579991","https://openalex.org/W2317851288","https://openalex.org/W2340897893","https://openalex.org/W2412782625","https://openalex.org/W2555618208","https://openalex.org/W2557889580","https://openalex.org/W2560023338","https://openalex.org/W2560609797","https://openalex.org/W2605161420","https://openalex.org/W2608858501","https://openalex.org/W2744404335","https://openalex.org/W2798965597","https://openalex.org/W2910628332","https://openalex.org/W2952819818","https://openalex.org/W2959771705","https://openalex.org/W2962759414","https://openalex.org/W2962850830","https://openalex.org/W2962864875","https://openalex.org/W2962891704","https://openalex.org/W2963108253","https://openalex.org/W2963150697","https://openalex.org/W2963167763","https://openalex.org/W2963281829","https://openalex.org/W2963350373","https://openalex.org/W2963727135","https://openalex.org/W2963753570","https://openalex.org/W2963775509","https://openalex.org/W2965182628","https://openalex.org/W2968296999","https://openalex.org/W2981537222","https://openalex.org/W2982161360","https://openalex.org/W2995947152","https://openalex.org/W3005680577","https://openalex.org/W3034355852","https://openalex.org/W3034430142","https://openalex.org/W3034681942","https://openalex.org/W3035172746","https://openalex.org/W3035574168","https://openalex.org/W3046136908","https://openalex.org/W3107212734","https://openalex.org/W3108086282","https://openalex.org/W3110002552","https://openalex.org/W3129785557","https://openalex.org/W3168649818","https://openalex.org/W3203597819","https://openalex.org/W3204034406","https://openalex.org/W3208394352","https://openalex.org/W4214773923","https://openalex.org/W6730410022","https://openalex.org/W6736147098","https://openalex.org/W6739778489","https://openalex.org/W6760782946","https://openalex.org/W6763422710","https://openalex.org/W6772149600","https://openalex.org/W6774314701","https://openalex.org/W6778129325","https://openalex.org/W6781542114"],"related_works":["https://openalex.org/W2185469136","https://openalex.org/W2011264131","https://openalex.org/W4306353150","https://openalex.org/W2026860389","https://openalex.org/W8219677","https://openalex.org/W3216879894","https://openalex.org/W2890132085","https://openalex.org/W2168054807","https://openalex.org/W4301143707","https://openalex.org/W2952745240"],"abstract_inverted_index":{"Segmenting":[0],"object":[1,135],"instances":[2],"is":[3],"a":[4,20,54,150],"key":[5],"task":[6],"in":[7,13,68],"machine":[8],"perception,":[9],"with":[10],"safety-critical":[11],"applications":[12],"robotics":[14],"and":[15,37,75,96,144],"autonomous":[16],"driving.":[17],"We":[18,79,101,137,147],"introduce":[19],"novel":[21],"approach":[22],"to":[23,42,53,66,87,93,130],"instance":[24,122],"segmentation":[25,56],"that":[26,50,81,90,104],"jointly":[27],"leverages":[28],"measurements":[29],"from":[30],"multiple":[31],"sensor":[32,73,99],"modalities,":[33],"such":[34,133],"as":[35,111,134],"cameras":[36],"LiDAR.":[38],"Our":[39],"method":[40,140],"learns":[41],"predict":[43],"embeddings":[44,89,106],"for":[45,161],"each":[46],"pixel":[47],"or":[48],"point":[49],"give":[51],"rise":[52],"dense":[55],"of":[57,152],"the":[58,69,76,85,105,115,142,162],"scene.":[59,116],"Specifically,":[60],"our":[61,139],"technique":[62],"applies":[63],"contrastive":[64,163],"learning":[65],"points":[67],"scene":[70],"both":[71],"across":[72,98],"modalities":[74],"temporal":[77],"domain.":[78],"demonstrate":[80,103],"this":[82],"formulation":[83],"encourages":[84],"models":[86],"learn":[88],"are":[91,107],"invariant":[92],"viewpoint":[94],"variations":[95],"consistent":[97],"modalities.":[100],"further":[102,148],"stable":[108,121],"over":[109],"time":[110],"objects":[112],"move":[113],"around":[114],"This":[117],"not":[118],"only":[119],"provides":[120],"masks,":[123],"but":[124],"can":[125],"also":[126],"provide":[127],"valuable":[128],"signals":[129],"downstream":[131],"tasks,":[132],"tracking.":[136],"evaluate":[138],"on":[141],"Cityscapes":[143],"KITTI-360":[145],"datasets.":[146],"conduct":[149],"number":[151],"ablation":[153],"studies,":[154],"demonstrating":[155],"benefits":[156],"when":[157],"applying":[158],"additional":[159],"inputs":[160],"loss.":[164]},"counts_by_year":[{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
