{"id":"https://openalex.org/W4401414432","doi":"https://doi.org/10.1109/icra57147.2024.10611575","title":"Robotic Offline RL from Internet Videos via Value-Function Learning","display_name":"Robotic Offline RL from Internet Videos via Value-Function Learning","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401414432","doi":"https://doi.org/10.1109/icra57147.2024.10611575"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10611575","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10611575","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056159807","display_name":"Chethan Bhateja","orcid":null},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chethan Bhateja","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113325532","display_name":"Derek Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Derek Guo","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052979358","display_name":"Dibya Ghosh","orcid":null},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dibya Ghosh","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088818925","display_name":"Anikait Singh","orcid":null},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anikait Singh","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080311165","display_name":"Manan Tomar","orcid":null},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Manan Tomar","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101445461","display_name":"Quan Vuong","orcid":"https://orcid.org/0009-0009-1829-9614"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Quan Vuong","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001178359","display_name":"Yevgen Chebotar","orcid":null},"institutions":[{"id":"https://openalex.org/I4210090411","display_name":"DeepMind (United Kingdom)","ror":"https://ror.org/00971b260","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210090411","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Yevgen Chebotar","raw_affiliation_strings":["Google DeepMind"],"affiliations":[{"raw_affiliation_string":"Google DeepMind","institution_ids":["https://openalex.org/I1291425158","https://openalex.org/I4210090411"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026322200","display_name":"Sergey Levine","orcid":"https://orcid.org/0000-0001-6764-2743"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sergey Levine","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102493293","display_name":"Aviral Kumar","orcid":null},"institutions":[{"id":"https://openalex.org/I134446601","display_name":"Berkeley College","ror":"https://ror.org/02xewxa75","country_code":"US","type":"education","lineage":["https://openalex.org/I134446601"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aviral Kumar","raw_affiliation_strings":["UC Berkeley"],"affiliations":[{"raw_affiliation_string":"UC Berkeley","institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5056159807"],"corresponding_institution_ids":["https://openalex.org/I134446601","https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":1.7958,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.87156541,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"16977","last_page":"16984"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6747329235076904},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.5432294011116028},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4946434199810028},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4182719588279724},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.32131755352020264},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.20938801765441895}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6747329235076904},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.5432294011116028},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4946434199810028},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4182719588279724},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.32131755352020264},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.20938801765441895},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10611575","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10611575","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":56,"referenced_works":["https://openalex.org/W1646707810","https://openalex.org/W2102558581","https://openalex.org/W2121703796","https://openalex.org/W2302255633","https://openalex.org/W2591957724","https://openalex.org/W2769112066","https://openalex.org/W2884247313","https://openalex.org/W2913350542","https://openalex.org/W2962787969","https://openalex.org/W2962858109","https://openalex.org/W3033324992","https://openalex.org/W3204665606","https://openalex.org/W3205786327","https://openalex.org/W4221159977","https://openalex.org/W4221160484","https://openalex.org/W4225000296","https://openalex.org/W4226167593","https://openalex.org/W4250482878","https://openalex.org/W4252279978","https://openalex.org/W4283460722","https://openalex.org/W4283753019","https://openalex.org/W4287811291","https://openalex.org/W4300799055","https://openalex.org/W4302010007","https://openalex.org/W4303648978","https://openalex.org/W4313156423","https://openalex.org/W4365440904","https://openalex.org/W4383300946","https://openalex.org/W4385430825","https://openalex.org/W4385431288","https://openalex.org/W4386065350","https://openalex.org/W4386185624","https://openalex.org/W6677984395","https://openalex.org/W6734502593","https://openalex.org/W6740801417","https://openalex.org/W6753770476","https://openalex.org/W6758856176","https://openalex.org/W6766263406","https://openalex.org/W6769166761","https://openalex.org/W6776901495","https://openalex.org/W6779265984","https://openalex.org/W6784862516","https://openalex.org/W6801009634","https://openalex.org/W6803870738","https://openalex.org/W6809850638","https://openalex.org/W6810080435","https://openalex.org/W6810635395","https://openalex.org/W6810655313","https://openalex.org/W6839256673","https://openalex.org/W6839639268","https://openalex.org/W6840133048","https://openalex.org/W6845226490","https://openalex.org/W6845793730","https://openalex.org/W6852147282","https://openalex.org/W6854358842","https://openalex.org/W6855642159"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Pre-training":[0],"on":[1,110,122,153,163,188],"Internet":[2],"data":[3,48,63,155],"has":[4],"proven":[5],"to":[6,23,45,131],"be":[7,214],"a":[8,58,96,189],"key":[9],"ingredient":[10],"for":[11,74,88,98,139,173],"broad":[12],"generalization":[13],"in":[14,27,104,168,194],"many":[15],"modern":[16],"ML":[17],"systems.":[18],"What":[19],"would":[20],"it":[21],"take":[22],"enable":[24],"such":[25],"capabilities":[26],"robotic":[28,51,105,133,157],"reinforcement":[29],"learning":[30,52,111,121,140],"(RL)?":[31],"Offline":[32],"RL":[33,89,135,159],"methods,":[34],"which":[35,67],"learn":[36],"from":[37,141],"datasets":[38,72,103,124],"of":[39,151],"robot":[40,165,192],"experience,":[41],"offer":[42],"one":[43],"way":[44],"leverage":[46],"prior":[47,71,206],"into":[49],"the":[50,69,82,149],"pipeline.":[53],"However,":[54],"these":[55],"methods":[56],"have":[57],"\"type":[59],"mismatch\"":[60],"with":[61,156],"video":[62,77,102,123,142,154,209],"(such":[64],"as":[65],"Ego4D),":[66],"are":[68,128],"largest":[70],"available":[73],"robotics,":[75],"since":[76],"offers":[78],"observation-only":[79],"experience":[80],"without":[81],"action":[83],"or":[84],"reward":[85],"annotations":[86],"needed":[87],"methods.":[90,207],"In":[91],"this":[92],"paper,":[93],"we":[94],"develop":[95],"system":[97],"leveraging":[99],"large-scale":[100],"human":[101],"offline":[106,134,158],"RL,":[107],"based":[108],"entirely":[109],"value":[112,120,169],"functions":[113,170],"via":[114],"temporal-difference":[115],"learning.":[116],"We":[117],"show":[118],"that":[119,127,161,176,201],"learns":[125],"representations":[126],"more":[129],"conducive":[130],"downstream":[132],"than":[136],"other":[137,205],"approaches":[138,160],"data.":[143],"Our":[144,208],"system,":[145],"called":[146],"V-PTR,":[147],"combines":[148],"benefits":[150],"pre-training":[152],"train":[162],"diverse":[164],"data,":[166],"resulting":[167],"and":[171,181,193,210],"policies":[172,200],"manipulation":[174,186],"tasks":[175,187],"perform":[177],"better,":[178],"act":[179],"robustly,":[180],"generalize":[182],"broadly.":[183],"On":[184],"several":[185],"real":[190],"WidowX":[191],"simulated":[195],"settings,":[196],"our":[197],"framework":[198],"produces":[199],"greatly":[202],"improve":[203],"over":[204],"additional":[211],"details":[212],"can":[213],"found":[215],"at":[216],"https://dibyaghosh.com/vptr/.":[217]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-15T09:29:46.208133","created_date":"2025-10-10T00:00:00"}
