{"id":"https://openalex.org/W4405787040","doi":"https://doi.org/10.1109/iros58592.2024.10802453","title":"Multi-modal NeRF Self-Supervision for LiDAR Semantic Segmentation","display_name":"Multi-modal NeRF Self-Supervision for LiDAR Semantic Segmentation","publication_year":2024,"publication_date":"2024-10-14","ids":{"openalex":"https://openalex.org/W4405787040","doi":"https://doi.org/10.1109/iros58592.2024.10802453"},"language":"en","primary_location":{"id":"doi:10.1109/iros58592.2024.10802453","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros58592.2024.10802453","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046686987","display_name":"Xavier Timoneda","orcid":"https://orcid.org/0000-0001-6046-7380"},"institutions":[{"id":"https://openalex.org/I1319473763","display_name":"Volkswagen Group (Germany)","ror":"https://ror.org/01f3bhg26","country_code":"DE","type":"company","lineage":["https://openalex.org/I1319473763"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Xavier Timoneda","raw_affiliation_strings":["Onboard Fusion Team at CARIAD SE, Volkswagen Group,Ingolstadt,Germany"],"affiliations":[{"raw_affiliation_string":"Onboard Fusion Team at CARIAD SE, Volkswagen Group,Ingolstadt,Germany","institution_ids":["https://openalex.org/I1319473763"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053901906","display_name":"Markus Herb","orcid":null},"institutions":[{"id":"https://openalex.org/I1319473763","display_name":"Volkswagen Group (Germany)","ror":"https://ror.org/01f3bhg26","country_code":"DE","type":"company","lineage":["https://openalex.org/I1319473763"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Markus Herb","raw_affiliation_strings":["Onboard Fusion Team at CARIAD SE, Volkswagen Group,Ingolstadt,Germany"],"affiliations":[{"raw_affiliation_string":"Onboard Fusion Team at CARIAD SE, Volkswagen Group,Ingolstadt,Germany","institution_ids":["https://openalex.org/I1319473763"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103025098","display_name":"Fabian Duerr","orcid":"https://orcid.org/0000-0002-9578-7664"},"institutions":[{"id":"https://openalex.org/I1319473763","display_name":"Volkswagen Group (Germany)","ror":"https://ror.org/01f3bhg26","country_code":"DE","type":"company","lineage":["https://openalex.org/I1319473763"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Fabian Duerr","raw_affiliation_strings":["Onboard Fusion Team at CARIAD SE, Volkswagen Group,Ingolstadt,Germany"],"affiliations":[{"raw_affiliation_string":"Onboard Fusion Team at CARIAD SE, Volkswagen Group,Ingolstadt,Germany","institution_ids":["https://openalex.org/I1319473763"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007762628","display_name":"Daniel Goehring","orcid":"https://orcid.org/0000-0001-7819-7163"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Daniel Goehring","raw_affiliation_strings":["Dahlem Center for Machine Learning and Robotics Group at Freie Universit&#x00E4;t,Berlin,Germany"],"affiliations":[{"raw_affiliation_string":"Dahlem Center for Machine Learning and Robotics Group at Freie Universit&#x00E4;t,Berlin,Germany","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5067487326","display_name":"Fisher Yu","orcid":"https://orcid.org/0000-0001-8829-7344"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Fisher Yu","raw_affiliation_strings":["ETH Zurich,Department of Information Technology and Electrical Engineering,Switzerland"],"affiliations":[{"raw_affiliation_string":"ETH Zurich,Department of Information Technology and Electrical Engineering,Switzerland","institution_ids":["https://openalex.org/I35440088"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5046686987"],"corresponding_institution_ids":["https://openalex.org/I1319473763"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.250355,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"12939","last_page":"12946"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9634000062942505,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14339","display_name":"Image Processing and 3D Reconstruction","score":0.9634000062942505,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9449999928474426,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9071999788284302,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6832970976829529},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.66353440284729},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5739498734474182},{"id":"https://openalex.org/keywords/lidar","display_name":"Lidar","score":0.4942035973072052},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41319340467453003},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.376526415348053},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3434589207172394},{"id":"https://openalex.org/keywords/remote-sensing","display_name":"Remote sensing","score":0.21899950504302979},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.16398194432258606},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.15387791395187378},{"id":"https://openalex.org/keywords/composite-material","display_name":"Composite material","score":0.056278884410858154}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6832970976829529},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.66353440284729},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5739498734474182},{"id":"https://openalex.org/C51399673","wikidata":"https://www.wikidata.org/wiki/Q504027","display_name":"Lidar","level":2,"score":0.4942035973072052},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41319340467453003},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.376526415348053},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3434589207172394},{"id":"https://openalex.org/C62649853","wikidata":"https://www.wikidata.org/wiki/Q199687","display_name":"Remote sensing","level":1,"score":0.21899950504302979},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.16398194432258606},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.15387791395187378},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.056278884410858154}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros58592.2024.10802453","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros58592.2024.10802453","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5099999904632568,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W1743852472","https://openalex.org/W1903029394","https://openalex.org/W2560609797","https://openalex.org/W2588289902","https://openalex.org/W2630837129","https://openalex.org/W2795587607","https://openalex.org/W2895281799","https://openalex.org/W2964162504","https://openalex.org/W2991216808","https://openalex.org/W3003437478","https://openalex.org/W3013012408","https://openalex.org/W3035172746","https://openalex.org/W3035275207","https://openalex.org/W3035574168","https://openalex.org/W3035680157","https://openalex.org/W3107695429","https://openalex.org/W3109154950","https://openalex.org/W3109944402","https://openalex.org/W3137210930","https://openalex.org/W3159481202","https://openalex.org/W3171581326","https://openalex.org/W3173112500","https://openalex.org/W3187277596","https://openalex.org/W3198519842","https://openalex.org/W3203495808","https://openalex.org/W4214899942","https://openalex.org/W4226389400","https://openalex.org/W4312310512","https://openalex.org/W4312788649","https://openalex.org/W4312828305","https://openalex.org/W4386065958","https://openalex.org/W4390871794","https://openalex.org/W4390874575","https://openalex.org/W6647782523","https://openalex.org/W6733814495","https://openalex.org/W6774631009","https://openalex.org/W6776411772","https://openalex.org/W6778980833","https://openalex.org/W6780257433","https://openalex.org/W6800071899","https://openalex.org/W6802946413","https://openalex.org/W6853702739"],"related_works":["https://openalex.org/W4319317934","https://openalex.org/W2901265155","https://openalex.org/W2956374172","https://openalex.org/W4319837668","https://openalex.org/W4308071650","https://openalex.org/W3188333020","https://openalex.org/W4281783339","https://openalex.org/W1964041166","https://openalex.org/W4390887692","https://openalex.org/W4221065211"],"abstract_inverted_index":{"LiDAR":[0,15,53,78,116,201,233],"Semantic":[1,234],"Segmentation":[2,235],"is":[3],"a":[4,18,109,185],"fundamental":[5],"task":[6],"in":[7,230],"autonomous":[8],"driving":[9],"perception":[10,79],"consisting":[11],"of":[12,44,187,227],"associating":[13],"each":[14,33,160],"point":[16],"to":[17,76,113,183,202],"semantic":[19,157],"label.":[20],"Fully-supervised":[21],"models":[22],"have":[23],"widely":[24,60],"tackled":[25],"this":[26],"task,":[27],"but":[28],"they":[29],"require":[30],"labels":[31],"for":[32,167],"scan,":[34],"which":[35,48,65,164],"either":[36],"limits":[37],"their":[38,104],"domain":[39,81],"or":[40],"requires":[41],"impractical":[42],"amounts":[43],"expensive":[45],"annotations.Camera":[46],"images,":[47],"are":[49,66,165],"generally":[50],"recorded":[51],"alongside":[52,118],"pointclouds,":[54],"can":[55],"be":[56],"processed":[57],"by":[58,96],"the":[59,86,92,97,122,130,143,147,174,180,193,196,207,214,225],"available":[61],"2D":[62,74],"foundation":[63,177],"models,":[64],"generic":[67,189],"and":[68,139,156,217,239],"dataset-agnostic.":[69],"However,":[70],"distilling":[71],"knowledge":[72,120],"from":[73,91,121,142,200],"data":[75],"improve":[77],"raises":[80],"adaptation":[82],"challenges.":[83],"For":[84],"example,":[85],"classical":[87],"perspective":[88],"projection":[89],"suffers":[90],"parallax":[93],"effect":[94],"produced":[95],"position":[98],"shift":[99],"between":[100],"both":[101],"sensors":[102],"at":[103,159],"respective":[105],"capture":[106],"times.We":[107],"propose":[108],"Semi-Supervised":[110],"Learning":[111],"setup":[112],"leverage":[114],"unlabeled":[115,131,148,188],"pointclouds":[117],"distilled":[119],"camera":[123,144,181],"images.":[124],"To":[125],"self-supervise":[126],"our":[127,219,228],"model":[128,178,220],"on":[129],"scans,":[132],"we":[133,172,212],"add":[134],"an":[135],"auxiliary":[136],"NeRF":[137,152,215],"head":[138,153,216],"cast":[140],"rays":[141],"viewpoint":[145],"over":[146],"voxel":[149],"features.":[150],"The":[151],"predicts":[154],"densities":[155],"logits":[158],"sampled":[161],"ray":[162],"location":[163],"used":[166],"rendering":[168],"pixel":[169,198,208],"semantics.":[170],"Concurrently,":[171],"query":[173],"Segment-Anything":[175],"(SAM)":[176],"with":[179,195,221],"image":[182],"generate":[184],"set":[186],"masks.":[190],"We":[191],"fuse":[192],"masks":[194],"rendered":[197],"semantics":[199],"produce":[203],"pseudo-labels":[204],"that":[205],"supervise":[206],"predictions.":[209],"During":[210],"inference,":[211],"drop":[213],"run":[218],"only":[222],"LiDAR.We":[223],"show":[224],"effectiveness":[226],"approach":[229],"three":[231],"public":[232],"benchmarks:":[236],"nuScenes,":[237],"SemanticKITTI":[238],"ScribbleKITTI.":[240]},"counts_by_year":[],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
