{"id":"https://openalex.org/W7147274877","doi":"https://doi.org/10.48550/arxiv.2603.28053","title":"Reducing Oracle Feedback with Vision-Language Embeddings for Preference-Based RL","display_name":"Reducing Oracle Feedback with Vision-Language Embeddings for Preference-Based RL","publication_year":2026,"publication_date":"2026-03-30","ids":{"openalex":"https://openalex.org/W7147274877","doi":"https://doi.org/10.48550/arxiv.2603.28053"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.28053","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.28053","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.28053","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058809789","display_name":"Udita Ghosh","orcid":"https://orcid.org/0000-0001-9057-585X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ghosh, Udita","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034364428","display_name":"Dripta S. Raychaudhuri","orcid":"https://orcid.org/0000-0001-6519-2259"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Raychaudhuri, Dripta S.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132617171","display_name":"Jiachen Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Jiachen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006022077","display_name":"Konstantinos Karydis","orcid":"https://orcid.org/0000-0002-1144-8260"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Karydis, Konstantinos","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5103706109","display_name":"Amit K. Roy-Chowdhury","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roy-Chowdhury, Amit","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.5496000051498413,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.5496000051498413,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.1137000024318695,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.08079999685287476,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/oracle","display_name":"Oracle","score":0.8995000123977661},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.8557999730110168},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7224000096321106},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5187000036239624},{"id":"https://openalex.org/keywords/oracle-unified-method","display_name":"Oracle Unified Method","score":0.38690000772476196},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.383899986743927}],"concepts":[{"id":"https://openalex.org/C55166926","wikidata":"https://www.wikidata.org/wiki/Q2892946","display_name":"Oracle","level":2,"score":0.8995000123977661},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.8557999730110168},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7756999731063843},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7224000096321106},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5187000036239624},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.41449999809265137},{"id":"https://openalex.org/C22664368","wikidata":"https://www.wikidata.org/wiki/Q4045949","display_name":"Oracle Unified Method","level":5,"score":0.38690000772476196},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.383899986743927},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.3801000118255615},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.33899998664855957},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3382999897003174},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.31700000166893005},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29510000348091125},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2768999934196472},{"id":"https://openalex.org/C94284585","wikidata":"https://www.wikidata.org/wiki/Q228184","display_name":"Random oracle","level":4,"score":0.2750999927520752},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2648000121116638}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.28053","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.28053","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.28053","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.28053","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Preference-based":[0],"reinforcement":[1],"learning":[2],"can":[3],"learn":[4],"effective":[5],"reward":[6,40],"functions":[7],"from":[8],"comparisons,":[9],"but":[10,31],"its":[11],"scalability":[12,120],"is":[13],"constrained":[14],"by":[15,148],"the":[16,63,95,98,106,116,119,124,153,168],"high":[17,78],"cost":[18],"of":[19,118,121,126,163,170],"oracle":[20,58,73,100,146,176],"feedback.":[21,59],"Lightweight":[22],"vision-language":[23],"embedding":[24],"(VLE)":[25],"models":[26],"provide":[27],"a":[28,49,82,89,111],"cheaper":[29],"alternative,":[30],"their":[32,36,130],"noisy":[33],"outputs":[34],"limit":[35],"effectiveness":[37],"as":[38],"standalone":[39],"generators.":[41],"To":[42],"address":[43],"this":[44],"challenge,":[45],"we":[46,87],"propose":[47],"ROVED,":[48],"hybrid":[50],"framework":[51],"that":[52,93],"combines":[53],"VLE-based":[54],"supervision":[55,177],"with":[56,77,97,174],"targeted":[57],"Our":[60],"method":[61,92],"uses":[62],"VLE":[64,96,155],"to":[65,71,104,150,165],"generate":[66],"segment-level":[67],"preferences":[68],"and":[69,123],"defers":[70],"an":[72],"only":[74],"for":[75,178],"samples":[76],"uncertainty,":[79],"identified":[80],"through":[81],"filtering":[83],"mechanism.":[84],"In":[85],"addition,":[86],"introduce":[88],"parameter-efficient":[90],"fine-tuning":[91],"adapts":[94],"obtained":[99],"feedback":[101],"in":[102,110],"order":[103],"improve":[105],"model":[107],"over":[108],"time":[109],"synergistic":[112],"fashion.":[113],"This":[114],"ensures":[115],"retention":[117],"embeddings":[122,173],"accuracy":[125],"oracles,":[127],"while":[128,144],"avoiding":[129],"inefficiencies.":[131],"Across":[132],"multiple":[133],"robotic":[134],"manipulation":[135],"tasks,":[136,158],"ROVED":[137],"matches":[138],"or":[139],"surpasses":[140],"prior":[141],"preference-based":[142,179],"methods":[143],"reducing":[145],"queries":[147],"up":[149,164],"80%.":[151],"Remarkably,":[152],"adapted":[154],"generalizes":[156],"across":[157],"yielding":[159],"cumulative":[160],"annotation":[161],"savings":[162],"90%,":[166],"highlighting":[167],"practicality":[169],"combining":[171],"scalable":[172],"precise":[175],"RL.":[180]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-02T00:00:00"}
