{"id":"https://openalex.org/W4281255813","doi":"https://doi.org/10.48550/arxiv.2205.09743","title":"BEVerse: Unified Perception and Prediction in Birds-Eye-View for Vision-Centric Autonomous Driving","display_name":"BEVerse: Unified Perception and Prediction in Birds-Eye-View for Vision-Centric Autonomous Driving","publication_year":2022,"publication_date":"2022-05-19","ids":{"openalex":"https://openalex.org/W4281255813","doi":"https://doi.org/10.48550/arxiv.2205.09743"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2205.09743","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2205.09743","pdf_url":"https://arxiv.org/pdf/2205.09743","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2205.09743","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100457286","display_name":"Yunpeng Zhang","orcid":"https://orcid.org/0009-0002-2722-7128"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Yunpeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101705530","display_name":"Zheng Zhu","orcid":"https://orcid.org/0000-0002-4435-1692"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Zheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006396086","display_name":"Wenzhao Zheng","orcid":"https://orcid.org/0000-0001-7188-3734"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheng, Wenzhao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081987954","display_name":"Junjie Huang","orcid":"https://orcid.org/0000-0003-2382-4443"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Junjie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051049837","display_name":"Guan Huang","orcid":"https://orcid.org/0000-0002-9421-7344"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Guan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100620306","display_name":"Jie Zhou","orcid":"https://orcid.org/0000-0001-7701-234X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Jie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100460385","display_name":"Jiwen Lu","orcid":"https://orcid.org/0000-0002-6121-5529"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Jiwen","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100457286"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":83,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11605","display_name":"Visual Attention and Saliency Detection","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10331","display_name":"Video Surveillance and Tracking Methods","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8352793455123901},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6942091584205627},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6923901438713074},{"id":"https://openalex.org/keywords/timestamp","display_name":"Timestamp","score":0.6268052458763123},{"id":"https://openalex.org/keywords/motion","display_name":"Motion (physics)","score":0.5467641353607178},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5127335786819458},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5092576742172241},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5087905526161194},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.500436544418335},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4978148937225342},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.490845263004303},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4332830309867859},{"id":"https://openalex.org/keywords/grid","display_name":"Grid","score":0.431205689907074},{"id":"https://openalex.org/keywords/real-time-computing","display_name":"Real-time computing","score":0.09227034449577332}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8352793455123901},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6942091584205627},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6923901438713074},{"id":"https://openalex.org/C113954288","wikidata":"https://www.wikidata.org/wiki/Q186885","display_name":"Timestamp","level":2,"score":0.6268052458763123},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.5467641353607178},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5127335786819458},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5092576742172241},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5087905526161194},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.500436544418335},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4978148937225342},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.490845263004303},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4332830309867859},{"id":"https://openalex.org/C187691185","wikidata":"https://www.wikidata.org/wiki/Q2020720","display_name":"Grid","level":2,"score":0.431205689907074},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.09227034449577332},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2205.09743","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2205.09743","pdf_url":"https://arxiv.org/pdf/2205.09743","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2205.09743","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2205.09743","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2205.09743","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2205.09743","pdf_url":"https://arxiv.org/pdf/2205.09743","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2060561905","https://openalex.org/W1417711376","https://openalex.org/W1986883493","https://openalex.org/W2469862403","https://openalex.org/W2166378262","https://openalex.org/W2035891203","https://openalex.org/W4379524643","https://openalex.org/W2367807705","https://openalex.org/W4285233590","https://openalex.org/W2054792846"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3,97,115,156],"present":[4],"BEVerse,":[5],"a":[6],"unified":[7],"framework":[8],"for":[9,45,77,89,111,122],"3D":[10,133,167],"perception":[11],"and":[12,39,56,65,92,109,136,173,190],"prediction":[13],"based":[14],"on":[15,22,152,166],"multi-camera":[16,37],"systems.":[17],"Unlike":[18],"existing":[19,163],"studies":[20],"focusing":[21],"the":[23,69,72,95,99,117,129,141,153,159,178],"improvement":[24],"of":[25,119],"single-task":[26,164],"approaches,":[27],"BEVerse":[28,50,161,181],"features":[29,105],"in":[30,81,184],"producing":[31],"spatio-temporal":[32,73],"Birds-Eye-View":[33],"(BEV)":[34],"representations":[35,62],"from":[36,63],"videos":[38],"jointly":[40],"reasoning":[41,91],"about":[42],"multiple":[43,84],"tasks":[44],"vision-centric":[46],"autonomous":[47],"driving.":[48],"Specifically,":[49],"first":[51],"performs":[52],"shared":[53],"feature":[54,79],"extraction":[55,80],"lifting":[57],"to":[58,102],"generate":[59,103],"4D":[60],"BEV":[61,104],"multi-timestamp":[64],"multi-view":[66],"images.":[67],"After":[68],"ego-motion":[70],"alignment,":[71],"encoder":[74],"is":[75],"utilized":[76],"further":[78],"BEV.":[82],"Finally,":[83],"task":[85],"decoders":[86],"are":[87],"attached":[88],"joint":[90],"prediction.":[93,125,148,175],"Within":[94],"decoders,":[96],"propose":[98],"grid":[100],"sampler":[101],"with":[106,177],"different":[107,112],"ranges":[108],"granularities":[110],"tasks.":[113],"Also,":[114],"design":[116],"method":[118],"iterative":[120],"flow":[121],"memory-efficient":[123],"future":[124],"We":[126],"show":[127,157],"that":[128,158],"temporal":[130],"information":[131],"improves":[132],"object":[134,168],"detection":[135],"semantic":[137,170],"map":[138,171],"construction,":[139,172],"while":[140],"multi-task":[142,160],"learning":[143],"can":[144],"implicitly":[145],"benefit":[146],"motion":[147,174],"With":[149],"extensive":[150],"experiments":[151],"nuScenes":[154],"dataset,":[155],"outperforms":[162],"methods":[165],"detection,":[169],"Compared":[176],"sequential":[179],"paradigm,":[180],"also":[182],"favors":[183],"significantly":[185],"improved":[186],"efficiency.":[187],"The":[188],"code":[189],"trained":[191],"models":[192],"will":[193],"be":[194],"released":[195],"at":[196],"https://github.com/zhangyp15/BEVerse.":[197]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":48},{"year":2023,"cited_by_count":30}],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2022-05-23T00:00:00"}
