{"id":"https://openalex.org/W2972509658","doi":"https://doi.org/10.1109/icra40945.2020.9197456","title":"Predicting optimal value functions by interpolating reward functions in scalarized multi-objective reinforcement learning","display_name":"Predicting optimal value functions by interpolating reward functions in scalarized multi-objective reinforcement learning","publication_year":2020,"publication_date":"2020-05-01","ids":{"openalex":"https://openalex.org/W2972509658","doi":"https://doi.org/10.1109/icra40945.2020.9197456","mag":"2972509658"},"language":"en","primary_location":{"id":"doi:10.1109/icra40945.2020.9197456","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra40945.2020.9197456","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1909.05004","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032965719","display_name":"Arpan Kusari","orcid":"https://orcid.org/0000-0002-6322-9043"},"institutions":[{"id":"https://openalex.org/I1292974536","display_name":"Ford Motor Company (United States)","ror":"https://ror.org/00g2tkw06","country_code":"US","type":"company","lineage":["https://openalex.org/I1292974536"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Arpan Kusari","raw_affiliation_strings":["Research and Advanced Engineering, Ford Motor Company, Dearborn, MI, USA","Ford Motor Company"],"affiliations":[{"raw_affiliation_string":"Research and Advanced Engineering, Ford Motor Company, Dearborn, MI, USA","institution_ids":["https://openalex.org/I1292974536"]},{"raw_affiliation_string":"Ford Motor Company","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011665886","display_name":"Jonathan P. How","orcid":"https://orcid.org/0000-0001-8576-1930"},"institutions":[{"id":"https://openalex.org/I46020346","display_name":"American Institute of Aeronautics and Astronautics","ror":"https://ror.org/00a1rzv11","country_code":"US","type":"other","lineage":["https://openalex.org/I46020346"]},{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jonathan P. How","raw_affiliation_strings":["Department of Aeronautics and Astronautics, MIT, Cambridge, MA, USA","Massachusetts Institute Of Technology#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Aeronautics and Astronautics, MIT, Cambridge, MA, USA","institution_ids":["https://openalex.org/I46020346"]},{"raw_affiliation_string":"Massachusetts Institute Of Technology#TAB#","institution_ids":["https://openalex.org/I63966007"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5032965719"],"corresponding_institution_ids":["https://openalex.org/I1292974536"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.00703177,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7484","last_page":"7490"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9656999707221985,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7051209807395935},{"id":"https://openalex.org/keywords/interpolation","display_name":"Interpolation (computer graphics)","score":0.6504999995231628},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.6391993165016174},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5726487636566162},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.5288285613059998},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5086644291877747},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5032860636711121},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5031320452690125},{"id":"https://openalex.org/keywords/inverted-pendulum","display_name":"Inverted pendulum","score":0.49311205744743347},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4799806773662567},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.4531819820404053},{"id":"https://openalex.org/keywords/pendulum","display_name":"Pendulum","score":0.44475269317626953},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.41945451498031616},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.4176775813102722},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.3990873396396637},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.30923980474472046},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2035215198993683},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.10123848915100098},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07383441925048828},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.06876453757286072}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7051209807395935},{"id":"https://openalex.org/C137800194","wikidata":"https://www.wikidata.org/wiki/Q11713455","display_name":"Interpolation (computer graphics)","level":3,"score":0.6504999995231628},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.6391993165016174},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5726487636566162},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.5288285613059998},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5086644291877747},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5032860636711121},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5031320452690125},{"id":"https://openalex.org/C192921069","wikidata":"https://www.wikidata.org/wiki/Q550134","display_name":"Inverted pendulum","level":3,"score":0.49311205744743347},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4799806773662567},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.4531819820404053},{"id":"https://openalex.org/C110639684","wikidata":"https://www.wikidata.org/wiki/Q20702","display_name":"Pendulum","level":2,"score":0.44475269317626953},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.41945451498031616},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.4176775813102722},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3990873396396637},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30923980474472046},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2035215198993683},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.10123848915100098},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07383441925048828},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.06876453757286072},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.1109/icra40945.2020.9197456","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra40945.2020.9197456","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1909.05004","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.05004","pdf_url":"https://arxiv.org/pdf/1909.05004","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2972509658","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1909.05004.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:dspace.mit.edu:1721.1/136715","is_oa":true,"landing_page_url":"https://hdl.handle.net/1721.1/136715","pdf_url":null,"source":{"id":"https://openalex.org/S4306400425","display_name":"DSpace@MIT (Massachusetts Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I63966007","host_organization_name":"Massachusetts Institute of Technology","host_organization_lineage":["https://openalex.org/I63966007"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv","raw_type":"Article"},{"id":"pmh:oai:dspace.mit.edu:1721.1/136715.2","is_oa":true,"landing_page_url":"https://hdl.handle.net/1721.1/136715.2","pdf_url":null,"source":{"id":"https://openalex.org/S4306400425","display_name":"DSpace@MIT (Massachusetts Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I63966007","host_organization_name":"Massachusetts Institute of Technology","host_organization_lineage":["https://openalex.org/I63966007"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv","raw_type":"Article"},{"id":"doi:10.48550/arxiv.1909.05004","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1909.05004","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1909.05004","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1909.05004","pdf_url":"https://arxiv.org/pdf/1909.05004","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2972509658.pdf","grobid_xml":"https://content.openalex.org/works/W2972509658.grobid-xml"},"referenced_works_count":30,"referenced_works":["https://openalex.org/W194786220","https://openalex.org/W1502922572","https://openalex.org/W1590206506","https://openalex.org/W2058192020","https://openalex.org/W2076337359","https://openalex.org/W2101234009","https://openalex.org/W2102660061","https://openalex.org/W2117675763","https://openalex.org/W2121863487","https://openalex.org/W2141481921","https://openalex.org/W2145339207","https://openalex.org/W2145756561","https://openalex.org/W2155027007","https://openalex.org/W2156174663","https://openalex.org/W2161767008","https://openalex.org/W2182124586","https://openalex.org/W2186820913","https://openalex.org/W2540189295","https://openalex.org/W2625366419","https://openalex.org/W3103262232","https://openalex.org/W4285719527","https://openalex.org/W6629804754","https://openalex.org/W6675354045","https://openalex.org/W6677555207","https://openalex.org/W6681205794","https://openalex.org/W6683204974","https://openalex.org/W6683852811","https://openalex.org/W6685793570","https://openalex.org/W6780559895","https://openalex.org/W6845059051"],"related_works":["https://openalex.org/W3089424104","https://openalex.org/W2961186683","https://openalex.org/W2741991979","https://openalex.org/W2888519432","https://openalex.org/W2158969944","https://openalex.org/W2031067035","https://openalex.org/W2117626647","https://openalex.org/W2753088790","https://openalex.org/W3109669325","https://openalex.org/W3131283938","https://openalex.org/W2984409990","https://openalex.org/W1621708194","https://openalex.org/W2562989799","https://openalex.org/W52822972","https://openalex.org/W3202097587","https://openalex.org/W2156578004","https://openalex.org/W2156163138","https://openalex.org/W2186564083","https://openalex.org/W3106415912","https://openalex.org/W2154549708"],"abstract_inverted_index":{"A":[0,136],"common":[1],"approach":[2],"for":[3,8,52,72,92,106,156,174],"defining":[4],"a":[5,47,119,129,143],"reward":[6,65,125,148],"function":[7,66,71,85,115,126,149,155],"multi-objective":[9],"reinforcement":[10],"learning":[11],"(MORL)":[12],"problems":[13],"is":[14,50,139],"the":[15,19,32,37,40,43,61,64,68,76,82,88,94,102,113,124,133,147,152,168,195],"weighted":[16],"sum":[17],"of":[18,36,78,90,121,123,151,197,203,211],"multiple":[20],"objectives.":[21],"The":[22,164],"weights":[23,91,122,150],"are":[24],"then":[25],"treated":[26],"as":[27],"design":[28],"parameters":[29],"dependent":[30],"on":[31],"expertise":[33],"(and":[34,127],"preference)":[35],"person":[38],"performing":[39],"learning,":[41],"with":[42],"typical":[44],"result":[45],"that":[46,112,167],"new":[48],"solution":[49],"required":[51],"any":[53,107],"change":[54],"in":[55,132,179,194],"these":[56],"settings.":[57],"This":[58],"paper":[59],"investigates":[60],"relationship":[62],"between":[63],"and":[67,162,177,182,208],"optimal":[69,83,153],"value":[70,84,114,154],"MORL;":[73],"specifically":[74],"addressing":[75],"question":[77],"how":[79],"to":[80,104,141],"approximate":[81],"well":[86],"beyond":[87],"set":[89],"which":[93],"optimization":[95],"problem":[96],"was":[97],"actually":[98],"solved,":[99],"thereby":[100],"avoiding":[101],"need":[103],"recompute":[105],"particular":[108],"choice.":[109],"We":[110],"prove":[111],"transforms":[116],"smoothly":[117],"given":[118],"transformation":[120],"thus":[128],"smooth":[130,144],"interpolation":[131,145,169,192],"policy":[134],"space).":[135],"Gaussian":[137],"process":[138],"used":[140],"obtain":[142],"over":[146],"three":[157],"well-known":[158],"examples:":[159],"Gridworld,":[160],"Objectworld":[161],"Pendulum.":[163],"results":[165],"show":[166],"can":[170],"provide":[171],"robust":[172],"values":[173],"sample":[175],"states":[176],"actions":[178],"both":[180],"discrete":[181],"continuous":[183],"domain":[184,196],"problems.":[185],"Significant":[186],"advantages":[187],"arise":[188],"from":[189],"utilizing":[190],"this":[191],"technique":[193],"autonomous":[198],"vehicles:":[199],"easy,":[200],"instant":[201],"adaptation":[202],"user":[204],"preferences":[205,215],"while":[206],"driving":[207],"true":[209],"randomization":[210],"obstacle":[212],"vehicle":[213],"behavior":[214],"during":[216],"training.":[217]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
