{"id":"https://openalex.org/W1975560000","doi":"https://doi.org/10.1080/10798587.2004.10642878","title":"Improved Q<sub>MDP</sub>Policy for Partially Observable Markov Decision Processes in Large Domains: Embedding Exploration Dynamics","display_name":"Improved Q<sub>MDP</sub>Policy for Partially Observable Markov Decision Processes in Large Domains: Embedding Exploration Dynamics","publication_year":2004,"publication_date":"2004-01-01","ids":{"openalex":"https://openalex.org/W1975560000","doi":"https://doi.org/10.1080/10798587.2004.10642878","mag":"1975560000"},"language":"en","primary_location":{"id":"doi:10.1080/10798587.2004.10642878","is_oa":false,"landing_page_url":"https://doi.org/10.1080/10798587.2004.10642878","pdf_url":null,"source":{"id":"https://openalex.org/S40639465","display_name":"Intelligent Automation & Soft Computing","issn_l":"1079-8587","issn":["1079-8587","2326-005X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Intelligent Automation &amp; Soft Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031676252","display_name":"Giorgos Apostolikas","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Giorgos Apostolikas","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5085942072","display_name":"Spyros G. Tzafesta\u015f","orcid":"https://orcid.org/0000-0002-9700-9313"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Spyros Tzafestas","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5031676252"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.4497,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.73776998,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"10","issue":"3","first_page":"209","last_page":"220"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.993399977684021,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/partially-observable-markov-decision-process","display_name":"Partially observable Markov decision process","score":0.9484527111053467},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8091486692428589},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.7921225428581238},{"id":"https://openalex.org/keywords/observable","display_name":"Observable","score":0.7454233169555664},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7372725009918213},{"id":"https://openalex.org/keywords/action-selection","display_name":"Action selection","score":0.6281051635742188},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.6187653541564941},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5690006017684937},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5259647369384766},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.4930398166179657},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.4894522726535797},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.4251802861690521},{"id":"https://openalex.org/keywords/decision-process","display_name":"Decision process","score":0.4246410131454468},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.40165963768959045},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.384446918964386},{"id":"https://openalex.org/keywords/markov-model","display_name":"Markov model","score":0.25603899359703064},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.24681228399276733},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.22325152158737183},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.16581964492797852},{"id":"https://openalex.org/keywords/management-science","display_name":"Management science","score":0.15031665563583374}],"concepts":[{"id":"https://openalex.org/C17098449","wikidata":"https://www.wikidata.org/wiki/Q176814","display_name":"Partially observable Markov decision process","level":4,"score":0.9484527111053467},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8091486692428589},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7921225428581238},{"id":"https://openalex.org/C32848918","wikidata":"https://www.wikidata.org/wiki/Q845789","display_name":"Observable","level":2,"score":0.7454233169555664},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7372725009918213},{"id":"https://openalex.org/C166109690","wikidata":"https://www.wikidata.org/wiki/Q4677422","display_name":"Action selection","level":3,"score":0.6281051635742188},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.6187653541564941},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5690006017684937},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5259647369384766},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.4930398166179657},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.4894522726535797},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.4251802861690521},{"id":"https://openalex.org/C2984634286","wikidata":"https://www.wikidata.org/wiki/Q1331926","display_name":"Decision process","level":2,"score":0.4246410131454468},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.40165963768959045},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.384446918964386},{"id":"https://openalex.org/C163836022","wikidata":"https://www.wikidata.org/wiki/Q6771326","display_name":"Markov model","level":3,"score":0.25603899359703064},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.24681228399276733},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.22325152158737183},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.16581964492797852},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.15031665563583374},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1080/10798587.2004.10642878","is_oa":false,"landing_page_url":"https://doi.org/10.1080/10798587.2004.10642878","pdf_url":null,"source":{"id":"https://openalex.org/S40639465","display_name":"Intelligent Automation & Soft Computing","issn_l":"1079-8587","issn":["1079-8587","2326-005X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320547","host_organization_name":"Taylor & Francis","host_organization_lineage":["https://openalex.org/P4310320547"],"host_organization_lineage_names":["Taylor & Francis"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Intelligent Automation &amp; Soft Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.6100000143051147}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W95627046","https://openalex.org/W180325379","https://openalex.org/W1491843047","https://openalex.org/W1491973539","https://openalex.org/W1515891729","https://openalex.org/W1539216098","https://openalex.org/W1553175366","https://openalex.org/W1563317173","https://openalex.org/W1563965851","https://openalex.org/W1587799697","https://openalex.org/W1657674574","https://openalex.org/W1674439172","https://openalex.org/W1831612509","https://openalex.org/W2028145673","https://openalex.org/W2119567691","https://openalex.org/W2334782222","https://openalex.org/W6631000544","https://openalex.org/W6637113308","https://openalex.org/W6704298589","https://openalex.org/W6775686901"],"related_works":["https://openalex.org/W2999848267","https://openalex.org/W2096013579","https://openalex.org/W1589140671","https://openalex.org/W1760611253","https://openalex.org/W52153049","https://openalex.org/W2951545791","https://openalex.org/W1515117609","https://openalex.org/W2294884454","https://openalex.org/W4323315247","https://openalex.org/W131709709"],"abstract_inverted_index":{"Abstract":[0],"Artificial":[1],"Intelligence":[2],"techniques":[3],"were":[4],"primarily":[5],"focused":[6],"on":[7],"domains":[8,25,108],"in":[9,51,81],"which":[10,82,151],"at":[11],"each":[12],"time":[13],"the":[14,17,22,60,78,83,87,94,99,134,140,157],"state":[15,85,120],"of":[16,56,62,70,86,102,113,124,156],"world":[18,53],"is":[19,89],"known":[20],"to":[21,107,138],"system.":[23],"Such":[24],"can":[26],"be":[27,139],"modeled":[28],"as":[29],"a":[30,110,122,144],"Markov":[31],"Decision":[32,74],"Process":[33],"(MDP).":[34],"Action":[35],"and":[36,45,131],"planning":[37],"policies":[38],"for":[39,59,98],"MDPs":[40],"have":[41],"been":[42],"studied":[43],"extensively":[44],"several":[46],"efficient":[47],"methods":[48,125],"exist.":[49],"However,":[50],"real":[52],"problems":[54],"pieces":[55],"information":[57],"useful":[58],"process":[61],"action":[63],"selection":[64],"are":[65,104],"often":[66],"missing.":[67],"The":[68],"theory":[69],"Partially":[71],"Observable":[72],"Mazkov":[73],"Processes":[75],"(POMDP\u2019s)":[76],"covers":[77],"problem":[79],"domain":[80],"full":[84],"environment":[88],"not":[90],"directly":[91],"perceivable":[92],"by":[93],"agent.":[95],"Current":[96],"algorithms":[97],"exact":[100],"solution":[101],"POMDP\u2019s":[103],"only":[105],"applicable":[106],"with":[109,117],"small":[111],"number":[112,123],"states.":[114],"To":[115],"cope":[116],"more":[118],"extended":[119],"spaces,":[121],"that":[126],"achieve":[127],"sub-optimal":[128],"solutions":[129],"exist":[130],"among":[132],"these":[133],"QI,IDP":[135],"approach":[136],"seems":[137],"best.":[141],"We":[142],"introduce":[143],"novel":[145],"technique,":[146],"called":[147],"Explorative":[148],"[Qtilde]P":[149,158],"(EQI-IDP)":[150],"constitutes":[152],"an":[153],"important":[154],"enhancement":[155],"...":[159]},"counts_by_year":[{"year":2017,"cited_by_count":2},{"year":2014,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
