{"id":"https://openalex.org/W1967706693","doi":"https://doi.org/10.1109/adprl.2014.7010618","title":"Policy gradient approaches for multi-objective sequential decision making: A comparison","display_name":"Policy gradient approaches for multi-objective sequential decision making: A comparison","publication_year":2014,"publication_date":"2014-12-01","ids":{"openalex":"https://openalex.org/W1967706693","doi":"https://doi.org/10.1109/adprl.2014.7010618","mag":"1967706693"},"language":"en","primary_location":{"id":"doi:10.1109/adprl.2014.7010618","is_oa":false,"landing_page_url":"https://doi.org/10.1109/adprl.2014.7010618","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055633714","display_name":"Simone Parisi","orcid":"https://orcid.org/0000-0003-3886-8131"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Simone Parisi","raw_affiliation_strings":["Department of Electronics, Information and Bioengineering, Politecnico di Milano, Milan, Italy","Department of Electronics, Information and Bioengineering, Politecnico di Milano, Piazza Leonardo da Vinci, 32, 20133 Milan, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Electronics, Information and Bioengineering, Politecnico di Milano, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]},{"raw_affiliation_string":"Department of Electronics, Information and Bioengineering, Politecnico di Milano, Piazza Leonardo da Vinci, 32, 20133 Milan, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091526684","display_name":"Matteo Pirotta","orcid":null},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Matteo Pirotta","raw_affiliation_strings":["Department of Electronics, Information and Bioengineering, Politecnico di Milano, Milan, Italy","Department of Electronics, Information and Bioengineering, Politecnico di Milano, Piazza Leonardo da Vinci, 32, 20133 Milan, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Electronics, Information and Bioengineering, Politecnico di Milano, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]},{"raw_affiliation_string":"Department of Electronics, Information and Bioengineering, Politecnico di Milano, Piazza Leonardo da Vinci, 32, 20133 Milan, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005568061","display_name":"Nicola Smacchia","orcid":null},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Nicola Smacchia","raw_affiliation_strings":["Department of Electronics, Information and Bioengineering, Politecnico di Milano, Milan, Italy","Department of Electronics, Information and Bioengineering, Politecnico di Milano, Piazza Leonardo da Vinci, 32, 20133 Milan, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Electronics, Information and Bioengineering, Politecnico di Milano, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]},{"raw_affiliation_string":"Department of Electronics, Information and Bioengineering, Politecnico di Milano, Piazza Leonardo da Vinci, 32, 20133 Milan, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089088404","display_name":"Luca Bascetta","orcid":"https://orcid.org/0000-0002-5029-1083"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Luca Bascetta","raw_affiliation_strings":["Department of Electronics, Information and Bioengineering, Politecnico di Milano, Milan, Italy","Department of Electronics, Information and Bioengineering, Politecnico di Milano, Piazza Leonardo da Vinci, 32, 20133 Milan, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Electronics, Information and Bioengineering, Politecnico di Milano, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]},{"raw_affiliation_string":"Department of Electronics, Information and Bioengineering, Politecnico di Milano, Piazza Leonardo da Vinci, 32, 20133 Milan, Italy","institution_ids":["https://openalex.org/I93860229"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017130830","display_name":"Marcello Restelli","orcid":"https://orcid.org/0000-0002-6322-1076"},"institutions":[{"id":"https://openalex.org/I93860229","display_name":"Politecnico di Milano","ror":"https://ror.org/01nffqt88","country_code":"IT","type":"education","lineage":["https://openalex.org/I93860229"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Marcello Restelli","raw_affiliation_strings":["Department of Electronics, Information and Bioengineering, Politecnico di Milano, Milan, Italy","Department of Electronics, Information and Bioengineering, Politecnico di Milano, Piazza Leonardo da Vinci, 32, 20133 Milan, Italy"],"affiliations":[{"raw_affiliation_string":"Department of Electronics, Information and Bioengineering, Politecnico di Milano, Milan, Italy","institution_ids":["https://openalex.org/I93860229"]},{"raw_affiliation_string":"Department of Electronics, Information and Bioengineering, Politecnico di Milano, Piazza Leonardo da Vinci, 32, 20133 Milan, Italy","institution_ids":["https://openalex.org/I93860229"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5055633714"],"corresponding_institution_ids":["https://openalex.org/I93860229"],"apc_list":null,"apc_paid":null,"fwci":0.3561,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.59461797,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"319","issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11954","display_name":"Energy Efficiency and Management","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/2105","display_name":"Renewable Energy, Sustainability and the Environment"},"field":{"id":"https://openalex.org/fields/21","display_name":"Energy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.7079821228981018},{"id":"https://openalex.org/keywords/pareto-principle","display_name":"Pareto principle","score":0.6494203209877014},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5945969820022583},{"id":"https://openalex.org/keywords/multi-objective-optimization","display_name":"Multi-objective optimization","score":0.5711532831192017},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.48568853735923767},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.47975385189056396},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.44148558378219604},{"id":"https://openalex.org/keywords/gradient-descent","display_name":"Gradient descent","score":0.43513673543930054},{"id":"https://openalex.org/keywords/gradient-method","display_name":"Gradient method","score":0.4166652262210846},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2889869213104248},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.27784544229507446},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.25999701023101807}],"concepts":[{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.7079821228981018},{"id":"https://openalex.org/C137635306","wikidata":"https://www.wikidata.org/wiki/Q182667","display_name":"Pareto principle","level":2,"score":0.6494203209877014},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5945969820022583},{"id":"https://openalex.org/C68781425","wikidata":"https://www.wikidata.org/wiki/Q2052203","display_name":"Multi-objective optimization","level":2,"score":0.5711532831192017},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.48568853735923767},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.47975385189056396},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.44148558378219604},{"id":"https://openalex.org/C153258448","wikidata":"https://www.wikidata.org/wiki/Q1199743","display_name":"Gradient descent","level":3,"score":0.43513673543930054},{"id":"https://openalex.org/C115680565","wikidata":"https://www.wikidata.org/wiki/Q5977448","display_name":"Gradient method","level":2,"score":0.4166652262210846},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2889869213104248},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.27784544229507446},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.25999701023101807},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/adprl.2014.7010618","is_oa":false,"landing_page_url":"https://doi.org/10.1109/adprl.2014.7010618","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE Symposium on Adaptive Dynamic Programming and Reinforcement Learning (ADPRL)","raw_type":"proceedings-article"},{"id":"pmh:oai:re.public.polimi.it:11311/960378","is_oa":false,"landing_page_url":"http://hdl.handle.net/11311/960378","pdf_url":null,"source":{"id":"https://openalex.org/S4306400312","display_name":"Virtual Community of Pathological Anatomy (University of Castilla La Mancha)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79189158","host_organization_name":"University of Castilla-La Mancha","host_organization_lineage":["https://openalex.org/I79189158"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7799999713897705}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1480799349","https://openalex.org/W1966630946","https://openalex.org/W1985093013","https://openalex.org/W1988210060","https://openalex.org/W2012612381","https://openalex.org/W2044042941","https://openalex.org/W2047437401","https://openalex.org/W2098907614","https://openalex.org/W2102660061","https://openalex.org/W2112964839","https://openalex.org/W2120346334","https://openalex.org/W2125612430","https://openalex.org/W2127107099","https://openalex.org/W2156679542","https://openalex.org/W2797638056","https://openalex.org/W2963758131","https://openalex.org/W3103262232","https://openalex.org/W4214717370","https://openalex.org/W4292691288","https://openalex.org/W6628908457","https://openalex.org/W6647429866","https://openalex.org/W6676898700","https://openalex.org/W6677737365","https://openalex.org/W6694904549"],"related_works":["https://openalex.org/W1988210060","https://openalex.org/W2558610617","https://openalex.org/W1969982063","https://openalex.org/W2096554690","https://openalex.org/W2009632801","https://openalex.org/W1980405870","https://openalex.org/W2093818238","https://openalex.org/W2134152443","https://openalex.org/W1963550655","https://openalex.org/W2111550856","https://openalex.org/W2969103410","https://openalex.org/W2147968712","https://openalex.org/W2001839237","https://openalex.org/W2166949966","https://openalex.org/W2081211833","https://openalex.org/W2137328093","https://openalex.org/W2062166073","https://openalex.org/W2169195544","https://openalex.org/W2565362795","https://openalex.org/W2175113417"],"abstract_inverted_index":{"This":[0],"paper":[1],"investigates":[2],"the":[3,11,21,27,57,104,122,135,142,150,161,166],"use":[4,58],"of":[5,23,59,100,121,127,144,152],"policy":[6,89,136],"gradient":[7,60,143],"techniques":[8],"to":[9,36,56,62,160],"approximate":[10],"Pareto":[12,83,123,162],"frontier":[13],"in":[14,43,134],"Multi-Objective":[15,70],"Markov":[16],"Decision":[17],"Processes":[18],"(MOMDPs).":[19],"Despite":[20],"popularity":[22],"policy-gradient":[24],"algorithms":[25,31,182],"and":[26,82,90,170],"fact":[28],"that":[29,130,149],"gradient-ascent":[30,110],"have":[32],"been":[33,54],"already":[34],"proposed":[35],"numerically":[37],"solve":[38],"multi-objective":[39,46,64],"optimization":[40],"problems,":[41],"especially":[42],"combination":[44],"with":[45,179],"evolutionary":[47],"algorithms,":[48],"so":[49,148],"far":[50],"little":[51],"attention":[52],"has":[53],"paid":[55],"information":[61],"face":[63],"sequential":[65],"decision":[66],"problems.":[67],"Three":[68],"different":[69,168],"Reinforcement-Learning":[71],"(MORL)":[72],"approaches":[73,169],"are":[74,139],"here":[75],"presented.":[76],"The":[77,125],"first":[78],"two,":[79],"called":[80],"radial":[81],"following,":[84],"start":[85],"from":[86],"an":[87,117],"initial":[88],"perform":[91],"gradient-based":[92],"policy-search":[93],"procedures":[94],"aimed":[95],"at":[96,113],"finding":[97],"a":[98,108,128,132],"set":[99],"non-dominated":[101],"policies.":[102],"Differently,":[103],"third":[105],"approach":[106],"performs":[107],"single":[109],"run":[111],"that,":[112],"each":[114],"step,":[115],"generates":[116],"improved":[118],"continuous":[119],"approximation":[120],"frontier.":[124],"parameters":[126],"function":[129],"defines":[131],"manifold":[133],"parameter":[137],"space":[138],"updated":[140],"following":[141],"some":[145],"performance":[146],"criterion":[147],"sequence":[151],"candidate":[153],"solutions":[154],"gets":[155],"as":[156,158],"close":[157],"possible":[159],"front.":[163],"Besides":[164],"reviewing":[165],"three":[167],"discussing":[171],"their":[172],"main":[173],"properties,":[174],"we":[175],"empirically":[176],"compare":[177],"them":[178],"other":[180],"MORL":[181],"on":[183],"two":[184],"interesting":[185],"MOMDPs.":[186]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2015,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
