{"id":"https://openalex.org/W2795688094","doi":"https://doi.org/10.15439/2018f231","title":"Modular Multi-Objective Deep Reinforcement Learning with Decision Values","display_name":"Modular Multi-Objective Deep Reinforcement Learning with Decision Values","publication_year":2018,"publication_date":"2018-09-26","ids":{"openalex":"https://openalex.org/W2795688094","doi":"https://doi.org/10.15439/2018f231","mag":"2795688094"},"language":"en","primary_location":{"id":"doi:10.15439/2018f231","is_oa":true,"landing_page_url":"https://doi.org/10.15439/2018f231","pdf_url":"https://annals-csis.org/proceedings/2018/drp/pdf/231.pdf","source":{"id":"https://openalex.org/S4220651875","display_name":"Annals of Computer Science and Information Systems","issn_l":"2300-5963","issn":["2300-5963"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4310317484","host_organization_name":"Polskie Towarzystwo Informatyczne","host_organization_lineage":["https://openalex.org/P4310317484"],"host_organization_lineage_names":["Polskie Towarzystwo Informatyczne"],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of Computer Science and Information Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://annals-csis.org/proceedings/2018/drp/pdf/231.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027080461","display_name":"Tomasz Tajmajer","orcid":null},"institutions":[{"id":"https://openalex.org/I4654613","display_name":"University of Warsaw","ror":"https://ror.org/039bjqg32","country_code":"PL","type":"education","lineage":["https://openalex.org/I4654613"]}],"countries":["PL"],"is_corresponding":true,"raw_author_name":"Tomasz Tajmajer","raw_affiliation_strings":["Institute of Informatics, University of Warsaw ul. Banacha 2, 02-097, Warsaw, Poland"],"affiliations":[{"raw_affiliation_string":"Institute of Informatics, University of Warsaw ul. Banacha 2, 02-097, Warsaw, Poland","institution_ids":["https://openalex.org/I4654613"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5027080461"],"corresponding_institution_ids":["https://openalex.org/I4654613"],"apc_list":null,"apc_paid":null,"fwci":1.3095,"has_fulltext":true,"cited_by_count":37,"citation_normalized_percentile":{"value":0.8661859,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"15","issue":null,"first_page":"85","last_page":"93"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9721999764442444,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8233833312988281},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.659299373626709},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.6485933065414429},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.493224561214447},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.4606516361236572},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3706226348876953},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.19018998742103577},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.07531946897506714}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8233833312988281},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.659299373626709},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.6485933065414429},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.493224561214447},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.4606516361236572},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3706226348876953},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.19018998742103577},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.07531946897506714},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.15439/2018f231","is_oa":true,"landing_page_url":"https://doi.org/10.15439/2018f231","pdf_url":"https://annals-csis.org/proceedings/2018/drp/pdf/231.pdf","source":{"id":"https://openalex.org/S4220651875","display_name":"Annals of Computer Science and Information Systems","issn_l":"2300-5963","issn":["2300-5963"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4310317484","host_organization_name":"Polskie Towarzystwo Informatyczne","host_organization_lineage":["https://openalex.org/P4310317484"],"host_organization_lineage_names":["Polskie Towarzystwo Informatyczne"],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of Computer Science and Information Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:doaj.org/article:8cff40237ce14460a3acdfa595239250","is_oa":true,"landing_page_url":"https://doaj.org/article/8cff40237ce14460a3acdfa595239250","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Annals of computer science and information systems, Vol 15, Pp 85-93 (2018)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.15439/2018f231","is_oa":true,"landing_page_url":"https://doi.org/10.15439/2018f231","pdf_url":"https://annals-csis.org/proceedings/2018/drp/pdf/231.pdf","source":{"id":"https://openalex.org/S4220651875","display_name":"Annals of Computer Science and Information Systems","issn_l":"2300-5963","issn":["2300-5963"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4310317484","host_organization_name":"Polskie Towarzystwo Informatyczne","host_organization_lineage":["https://openalex.org/P4310317484"],"host_organization_lineage_names":["Polskie Towarzystwo Informatyczne"],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of Computer Science and Information Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.75}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2795688094.pdf","grobid_xml":"https://content.openalex.org/works/W2795688094.grobid-xml"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W72400652","https://openalex.org/W1515851193","https://openalex.org/W1757796397","https://openalex.org/W2002305926","https://openalex.org/W2012612381","https://openalex.org/W2060846151","https://openalex.org/W2082709276","https://openalex.org/W2115211925","https://openalex.org/W2141481921","https://openalex.org/W2145339207","https://openalex.org/W2173564293","https://openalex.org/W2186820913","https://openalex.org/W2201581102","https://openalex.org/W2530195778","https://openalex.org/W2951799221","https://openalex.org/W2963477884","https://openalex.org/W3103262232","https://openalex.org/W4298857966","https://openalex.org/W4299830491","https://openalex.org/W6637967152","https://openalex.org/W6677067356","https://openalex.org/W6687681856","https://openalex.org/W6728073343"],"related_works":["https://openalex.org/W2045236383","https://openalex.org/W3074294383","https://openalex.org/W2121778218","https://openalex.org/W4206669594","https://openalex.org/W2961085424","https://openalex.org/W2959276766","https://openalex.org/W4295941380","https://openalex.org/W260766989","https://openalex.org/W575330181","https://openalex.org/W4319083788"],"abstract_inverted_index":{"In":[0,66],"this":[1,67],"work":[2],"we":[3,69,125],"present":[4],"a":[5,81,127,143],"method":[6],"for":[7],"using":[8],"Deep":[9,15],"Q-Networks":[10,16],"(DQNs)":[11],"in":[12,20,31,35,50,130,142],"multi-objective":[13],"environments.":[14],"provide":[17],"remarkable":[18],"performance":[19,117],"single":[21,82],"objective":[22],"problems":[23],"learning":[24],"from":[25],"high-level":[26,136],"visual":[27,137],"state":[28],"representations.":[29],"However,":[30],"many":[32],"scenarios":[33],"(e.g":[34],"robotics,":[36],"games),":[37],"the":[38,58,75,87,90,107,115,119],"agent":[39,133],"needs":[40],"to":[41,56,63,73,105],"pursue":[42],"multiple":[43,78,140],"objectives":[44,111,141],"simultaneously.":[45],"We":[46],"propose":[47],"an":[48,132],"architecture":[49,68,85],"which":[51,131],"separate":[52],"DQNs":[53,79],"are":[54],"used":[55,126],"control":[57],"agent's":[59,91],"behaviour":[60,92],"with":[61,135],"respect":[62],"particular":[64,110],"objectives.":[65],"introduce":[70],"decision":[71],"values":[72],"improve":[74],"scalarization":[76],"of":[77,89,109,118],"into":[80,93],"action.":[83],"Our":[84],"enables":[86],"decomposition":[88],"controllable":[94],"and":[95],"replaceable":[96],"sub-behaviours":[97],"learned":[98],"by":[99],"distinct":[100],"modules.":[101],"Moreover,":[102],"it":[103],"allows":[104],"change":[106],"priorities":[108],"post-learning,":[112],"while":[113],"preserving":[114],"overall":[116],"agent.":[120],"To":[121],"evaluate":[122],"our":[123],"solution":[124],"game-like":[128],"simulator":[129],"-provided":[134],"input":[138],"-pursues":[139],"2D":[144],"world.":[145]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":7},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
