{"id":"https://openalex.org/W4416748462","doi":"https://doi.org/10.1109/iros60139.2025.11246559","title":"Vision-Driven 2D Supervised Fine-Tuning Framework for Bird\u2019s Eye View Perception","display_name":"Vision-Driven 2D Supervised Fine-Tuning Framework for Bird\u2019s Eye View Perception","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416748462","doi":"https://doi.org/10.1109/iros60139.2025.11246559"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11246559","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246559","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100779350","display_name":"Lei He","orcid":"https://orcid.org/0000-0002-5024-0155"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lei He","raw_affiliation_strings":["Tsinghua University,School of Vehicle and Mobility,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University,School of Vehicle and Mobility,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072223131","display_name":"Qiaoyi Wang","orcid":"https://orcid.org/0009-0001-6615-012X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiaoyi Wang","raw_affiliation_strings":["Tsinghua University,School of Vehicle and Mobility,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University,School of Vehicle and Mobility,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103268472","display_name":"Honglin Sun","orcid":"https://orcid.org/0009-0009-5554-3903"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Honglin Sun","raw_affiliation_strings":["Tsinghua University,School of Vehicle and Mobility,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University,School of Vehicle and Mobility,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082349291","display_name":"Qing Xu","orcid":"https://orcid.org/0000-0001-9724-2978"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qing Xu","raw_affiliation_strings":["Tsinghua University,School of Vehicle and Mobility,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University,School of Vehicle and Mobility,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044921383","display_name":"Bolin Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bolin Gao","raw_affiliation_strings":["Tsinghua University,School of Vehicle and Mobility,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University,School of Vehicle and Mobility,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100747108","display_name":"Shengbo Eben Li","orcid":"https://orcid.org/0000-0003-4923-3633"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shengbo Eben Li","raw_affiliation_strings":["Tsinghua University,School of Vehicle and Mobility,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University,School of Vehicle and Mobility,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010490013","display_name":"Jianqiang Wang","orcid":"https://orcid.org/0000-0003-3235-2708"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianqiang Wang","raw_affiliation_strings":["Tsinghua University,School of Vehicle and Mobility,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University,School of Vehicle and Mobility,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031855986","display_name":"Keqiang Li","orcid":"https://orcid.org/0000-0002-9333-7416"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Keqiang Li","raw_affiliation_strings":["Tsinghua University,School of Vehicle and Mobility,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tsinghua University,School of Vehicle and Mobility,Beijing,China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5100779350"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.34972127,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"13728","last_page":"13735"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.6392999887466431,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.6392999887466431,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.11919999867677689,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.04729999974370003,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/monocular","display_name":"Monocular","score":0.6915000081062317},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.6459000110626221},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.6276000142097473},{"id":"https://openalex.org/keywords/lidar","display_name":"Lidar","score":0.5364999771118164},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5343000292778015},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.48240000009536743},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.43810001015663147},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.41359999775886536},{"id":"https://openalex.org/keywords/visual-perception","display_name":"Visual perception","score":0.4077000021934509}],"concepts":[{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7224000096321106},{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.6915000081062317},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6596999764442444},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.6459000110626221},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.6276000142097473},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5896000266075134},{"id":"https://openalex.org/C51399673","wikidata":"https://www.wikidata.org/wiki/Q504027","display_name":"Lidar","level":2,"score":0.5364999771118164},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5343000292778015},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.48240000009536743},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.43810001015663147},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.41359999775886536},{"id":"https://openalex.org/C178253425","wikidata":"https://www.wikidata.org/wiki/Q162668","display_name":"Visual perception","level":3,"score":0.4077000021934509},{"id":"https://openalex.org/C5339829","wikidata":"https://www.wikidata.org/wiki/Q1425977","display_name":"Machine vision","level":2,"score":0.32109999656677246},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3138999938964844},{"id":"https://openalex.org/C158829959","wikidata":"https://www.wikidata.org/wiki/Q1640606","display_name":"Monocular vision","level":2,"score":0.29330000281333923},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.28790000081062317},{"id":"https://openalex.org/C28063669","wikidata":"https://www.wikidata.org/wiki/Q7167042","display_name":"Perceptual system","level":3,"score":0.2867000102996826},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28049999475479126},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.28040000796318054},{"id":"https://openalex.org/C52672216","wikidata":"https://www.wikidata.org/wiki/Q1749840","display_name":"Depth perception","level":3,"score":0.2745000123977661},{"id":"https://openalex.org/C124504099","wikidata":"https://www.wikidata.org/wiki/Q56933","display_name":"Image segmentation","level":3,"score":0.274399995803833},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.2667999863624573},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.26260000467300415},{"id":"https://openalex.org/C56461940","wikidata":"https://www.wikidata.org/wiki/Q970687","display_name":"Eye tracking","level":2,"score":0.25690001249313354}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11246559","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246559","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2903165770","https://openalex.org/W2962766617","https://openalex.org/W2963351448","https://openalex.org/W3035172746","https://openalex.org/W3035574168","https://openalex.org/W3096609285","https://openalex.org/W3108381027","https://openalex.org/W3109395584","https://openalex.org/W3129840128","https://openalex.org/W3167095230","https://openalex.org/W3176888779","https://openalex.org/W3207105331","https://openalex.org/W3215100485","https://openalex.org/W4214520160","https://openalex.org/W4214558638","https://openalex.org/W4225793049","https://openalex.org/W4382464460","https://openalex.org/W4385245566","https://openalex.org/W4386071597","https://openalex.org/W4390872451","https://openalex.org/W4399485256","https://openalex.org/W4399768898","https://openalex.org/W4400032898","https://openalex.org/W4401109681","https://openalex.org/W4401839711","https://openalex.org/W4409426841"],"related_works":[],"abstract_inverted_index":{"Visual":[0],"bird\u2019s":[1],"eye":[2],"view":[3],"(BEV)":[4],"perception,":[5,90],"dute":[6],"to":[7,37],"its":[8,176],"excellent":[9],"perceptual":[10],"capabilities,":[11],"is":[12,45],"progressively":[13],"replacing":[14],"costly":[15],"LiDAR-based":[16],"perception":[17,31,83,107],"systems,":[18],"especially":[19],"in":[20,98],"the":[21,65,94,103,124,144,150],"realm":[22],"of":[23,30,105,152],"urban":[24],"intelligent":[25],"driving.":[26],"However,":[27],"this":[28,74],"type":[29],"still":[32],"relies":[33],"on":[34,86,126,143,165,172],"LiDAR":[35,66],"data":[36,67],"construct":[38],"ground":[39,129],"truth":[40],"databases,":[41],"a":[42,78],"process":[43],"that":[44],"both":[46],"cumbersome":[47],"and":[48,63,117,131,140,146,159,163,167,170,179],"time-consuming.":[49],"Additionally,":[50],"most":[51],"mass-produced":[52],"autonomous":[53],"driving":[54],"systems":[55],"are":[56],"equipped":[57],"solely":[58],"with":[59],"surround":[60],"camera":[61],"sensors":[62],"lack":[64],"necessary":[68],"for":[69,81,135],"precise":[70],"annotation.":[71],"To":[72],"tackle":[73],"challenge,":[75],"we":[76],"propose":[77],"fine-tuning":[79],"method":[80,110],"BEV":[82,128],"network":[84],"based":[85],"visual":[87],"2D":[88,106,113],"semantic":[89,114],"aimed":[91],"at":[92],"enhancing":[93],"model\u2019s":[95],"generalization":[96],"capabilities":[97],"new":[99],"scene":[100],"data.":[101],"Leveraging":[102],"maturity":[104],"technologies,":[108],"our":[109,153],"utilizes":[111],"only":[112],"segmentation":[115],"labels":[116],"monocular":[118],"depth":[119],"estimations,":[120],"thereby":[121],"significantly":[122],"reducing":[123],"dependence":[125],"expensive":[127],"truths":[130],"offering":[132],"strong":[133],"potential":[134],"industrial":[136],"deployment.":[137],"Extensive":[138],"experiments":[139],"comparative":[141],"analyses":[142],"nuScenes":[145],"Waymo":[147],"datasets":[148],"demonstrate":[149],"effectiveness":[151],"method.":[154],"Specifically,":[155],"it":[156],"improves":[157],"mAP":[158],"NDS":[160],"by":[161,168],"2.51%":[162],"1.93%":[164],"nuScenes,":[166],"1.21%":[169],"0.78%":[171],"Waymo,":[173],"respectively,":[174],"validating":[175],"practical":[177],"utility":[178],"robustness":[180],"across":[181],"diverse":[182],"domains.":[183]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-11-28T00:00:00"}
