{"id":"https://openalex.org/W4410228934","doi":"https://doi.org/10.1109/wcnc61545.2025.10978817","title":"Cosine Similarity Based Adaptive Implicit Q-Learning for Offline Reinforcement Learning","display_name":"Cosine Similarity Based Adaptive Implicit Q-Learning for Offline Reinforcement Learning","publication_year":2025,"publication_date":"2025-03-24","ids":{"openalex":"https://openalex.org/W4410228934","doi":"https://doi.org/10.1109/wcnc61545.2025.10978817"},"language":"en","primary_location":{"id":"doi:10.1109/wcnc61545.2025.10978817","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wcnc61545.2025.10978817","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Wireless Communications and Networking Conference (WCNC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055933688","display_name":"Xinchen Han","orcid":"https://orcid.org/0009-0007-8934-9223"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xinchen Han","raw_affiliation_strings":["Samovar, T&#x00E9;l&#x00E9;com SudParis Institut Polytechnique de Paris,Paris,France"],"affiliations":[{"raw_affiliation_string":"Samovar, T&#x00E9;l&#x00E9;com SudParis Institut Polytechnique de Paris,Paris,France","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079773668","display_name":"Hossam Afifi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210145102","display_name":"Institut Polytechnique de Paris","ror":"https://ror.org/042tfbd02","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210145102"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Hossam Afifi","raw_affiliation_strings":["Samovar, T&#x00E9;l&#x00E9;com SudParis Institut Polytechnique de Paris,Paris,France","IP Paris - Institut Polytechnique de Paris (Route de Saclay, 91120 Palaiseau Cedex, France - France)"],"affiliations":[{"raw_affiliation_string":"Samovar, T&#x00E9;l&#x00E9;com SudParis Institut Polytechnique de Paris,Paris,France","institution_ids":[]},{"raw_affiliation_string":"IP Paris - Institut Polytechnique de Paris (Route de Saclay, 91120 Palaiseau Cedex, France - France)","institution_ids":["https://openalex.org/I4210145102"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057498700","display_name":"Michel Marot","orcid":"https://orcid.org/0000-0003-2355-1625"},"institutions":[{"id":"https://openalex.org/I4210145102","display_name":"Institut Polytechnique de Paris","ror":"https://ror.org/042tfbd02","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210145102"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Michel Marot","raw_affiliation_strings":["Samovar, T&#x00E9;l&#x00E9;com SudParis Institut Polytechnique de Paris,Paris,France","IP Paris - Institut Polytechnique de Paris (Route de Saclay, 91120 Palaiseau Cedex, France - France)"],"affiliations":[{"raw_affiliation_string":"Samovar, T&#x00E9;l&#x00E9;com SudParis Institut Polytechnique de Paris,Paris,France","institution_ids":[]},{"raw_affiliation_string":"IP Paris - Institut Polytechnique de Paris (Route de Saclay, 91120 Palaiseau Cedex, France - France)","institution_ids":["https://openalex.org/I4210145102"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5055933688"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04926319,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9140999913215637,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9140999913215637,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9070000052452087,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8510473966598511},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7022238373756409},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6383501887321472},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5531545281410217},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.5486352443695068},{"id":"https://openalex.org/keywords/discrete-cosine-transform","display_name":"Discrete cosine transform","score":0.43407636880874634},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.43367767333984375},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.4158513844013214},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33374491333961487},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.2574329376220703},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09676986932754517},{"id":"https://openalex.org/keywords/structural-engineering","display_name":"Structural engineering","score":0.0689002275466919}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8510473966598511},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7022238373756409},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6383501887321472},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5531545281410217},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.5486352443695068},{"id":"https://openalex.org/C2221639","wikidata":"https://www.wikidata.org/wiki/Q2877","display_name":"Discrete cosine transform","level":3,"score":0.43407636880874634},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.43367767333984375},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.4158513844013214},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33374491333961487},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.2574329376220703},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09676986932754517},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0689002275466919},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/wcnc61545.2025.10978817","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wcnc61545.2025.10978817","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Wireless Communications and Networking Conference (WCNC)","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-05314272v1","is_oa":false,"landing_page_url":"https://hal.science/hal-05314272","pdf_url":null,"source":{"id":"https://openalex.org/S4406922461","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2025 IEEE Wireless Communications and Networking Conference (WCNC), Mar 2025, Milan, Italy. pp.1-6, &#x27E8;10.1109/WCNC61545.2025.10978817&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2109169869","https://openalex.org/W2978455699","https://openalex.org/W4324134792","https://openalex.org/W4360584316","https://openalex.org/W4387816882","https://openalex.org/W4402351158","https://openalex.org/W6677916085","https://openalex.org/W6757469721","https://openalex.org/W6763704811","https://openalex.org/W6779265984","https://openalex.org/W6779656125","https://openalex.org/W6796289742","https://openalex.org/W6796589144","https://openalex.org/W6796926305","https://openalex.org/W6801801719","https://openalex.org/W6802659552","https://openalex.org/W6811554491","https://openalex.org/W6838725491","https://openalex.org/W6846142413","https://openalex.org/W6851312059","https://openalex.org/W6858393028"],"related_works":["https://openalex.org/W2742483371","https://openalex.org/W3096874164","https://openalex.org/W2166117066","https://openalex.org/W3087814763","https://openalex.org/W2357975469","https://openalex.org/W2136202932","https://openalex.org/W4376605461","https://openalex.org/W4400868993","https://openalex.org/W2361647908","https://openalex.org/W2952356279"],"abstract_inverted_index":{"Offline":[0],"Reinforcement":[1],"Learning":[2],"(RL)":[3],"methods":[4],"constrain":[5],"the":[6,11,41,50,88,94,98,101,114,121],"policy":[7,42,89,99],"to":[8],"align":[9],"with":[10],"behaviour":[12,102],"policy,":[13],"mitigating":[14],"extrapolation":[15],"errors":[16],"caused":[17],"by":[18,92],"out-of-distribution":[19],"(OOD)":[20],"actions.":[21,48,125],"Implicit":[22],"Q-Learning":[23],"(IQL),":[24],"a":[25,107],"popular":[26],"offline":[27,109,153],"RL":[28,110,154],"algorithm,":[29],"leverages":[30],"expectile":[31,57],"regression":[32,58],"and":[33,67,100,146],"introduces":[34],"an":[35,78],"in-sample":[36,117],"learning":[37],"paradigm":[38],"that":[39,159],"enhances":[40],"evaluation":[43,90],"stage":[44],"without":[45,136],"querying":[46],"OOD":[47,124],"However,":[49],"crucial":[51],"parameter":[52,132],"<tex":[53,133],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[54,134],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$T$</tex>":[55,135],"for":[56],"in":[59],"IQL":[60,80],"is":[61,105],"fixed,":[62],"limiting":[63],"both":[64],"its":[65,148],"performance":[66,149],"flexibility":[68],"across":[69],"diverse":[70],"datasets.":[71],"In":[72,126],"this":[73],"paper,":[74],"we":[75],"propose":[76],"Cos-IQL,":[77],"improved":[79],"approach":[81],"based":[82],"on":[83,142],"cosine":[84,95],"similarity,":[85],"which":[86],"optimizes":[87],"function":[91],"measuring":[93],"similarity":[96],"between":[97],"policy.":[103],"Cos-IQL":[104,128,141,160],"essentially":[106],"multi-step":[108],"algorithm":[111],"but":[112],"retains":[113],"advantages":[115],"of":[116,123],"learning,":[118],"thus":[119],"avoiding":[120],"risks":[122],"addition,":[127],"can":[129],"adaptively":[130],"adjust":[131],"elaborate":[137],"fine-tuning.":[138],"We":[139],"evaluate":[140],"D4RL":[143],"benchmark":[144],"datasets":[145],"compare":[147],"against":[150],"recent":[151],"competitive":[152],"algorithms.":[155],"Experimental":[156],"results":[157],"show":[158],"achieves":[161],"state-of-the-art":[162],"performance.":[163]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
