{"id":"https://openalex.org/W4416749667","doi":"https://doi.org/10.1109/iros60139.2025.11245919","title":"Integrating Offline Pre-Training with Online Fine-Tuning: A Reinforcement Learning Approach for Robot Social Navigation","display_name":"Integrating Offline Pre-Training with Online Fine-Tuning: A Reinforcement Learning Approach for Robot Social Navigation","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416749667","doi":"https://doi.org/10.1109/iros60139.2025.11245919"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11245919","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11245919","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5057384888","display_name":"Run Su","orcid":"https://orcid.org/0000-0003-1785-673X"},"institutions":[{"id":"https://openalex.org/I43922553","display_name":"Wuhan University of Science and Technology","ror":"https://ror.org/00e4hrk88","country_code":"CN","type":"education","lineage":["https://openalex.org/I43922553"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Run Su","raw_affiliation_strings":["Wuhan University of Science and Technology,School of Computer Science and Technology"],"affiliations":[{"raw_affiliation_string":"Wuhan University of Science and Technology,School of Computer Science and Technology","institution_ids":["https://openalex.org/I43922553"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100604058","display_name":"Hao Fu","orcid":"https://orcid.org/0000-0002-9028-1728"},"institutions":[{"id":"https://openalex.org/I43922553","display_name":"Wuhan University of Science and Technology","ror":"https://ror.org/00e4hrk88","country_code":"CN","type":"education","lineage":["https://openalex.org/I43922553"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Fu","raw_affiliation_strings":["Wuhan University of Science and Technology,School of Computer Science and Technology"],"affiliations":[{"raw_affiliation_string":"Wuhan University of Science and Technology,School of Computer Science and Technology","institution_ids":["https://openalex.org/I43922553"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100730526","display_name":"Shuai Zhou","orcid":"https://orcid.org/0000-0003-3450-076X"},"institutions":[{"id":"https://openalex.org/I43922553","display_name":"Wuhan University of Science and Technology","ror":"https://ror.org/00e4hrk88","country_code":"CN","type":"education","lineage":["https://openalex.org/I43922553"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuai Zhou","raw_affiliation_strings":["Wuhan University of Science and Technology,School of Computer Science and Technology"],"affiliations":[{"raw_affiliation_string":"Wuhan University of Science and Technology,School of Computer Science and Technology","institution_ids":["https://openalex.org/I43922553"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047231419","display_name":"Yinghua Fu","orcid":"https://orcid.org/0000-0002-2059-6847"},"institutions":[{"id":"https://openalex.org/I43922553","display_name":"Wuhan University of Science and Technology","ror":"https://ror.org/00e4hrk88","country_code":"CN","type":"education","lineage":["https://openalex.org/I43922553"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yingao Fu","raw_affiliation_strings":["Wuhan University of Science and Technology,School of Computer Science and Technology"],"affiliations":[{"raw_affiliation_string":"Wuhan University of Science and Technology,School of Computer Science and Technology","institution_ids":["https://openalex.org/I43922553"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5057384888"],"corresponding_institution_ids":["https://openalex.org/I43922553"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.41179622,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"18660","last_page":"18667"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.4207000136375427,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.4207000136375427,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.18019999563694,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11500","display_name":"Evacuation and Crowd Dynamics","score":0.11729999631643295,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7260000109672546},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5958999991416931},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5843999981880188},{"id":"https://openalex.org/keywords/pedestrian","display_name":"Pedestrian","score":0.5224999785423279},{"id":"https://openalex.org/keywords/social-force-model","display_name":"Social force model","score":0.44749999046325684},{"id":"https://openalex.org/keywords/mobile-robot","display_name":"Mobile robot","score":0.4047999978065491},{"id":"https://openalex.org/keywords/motion-planning","display_name":"Motion planning","score":0.3675999939441681},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.3508000075817108}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7260000109672546},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6791999936103821},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5958999991416931},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5843999981880188},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.559499979019165},{"id":"https://openalex.org/C2777113093","wikidata":"https://www.wikidata.org/wiki/Q221488","display_name":"Pedestrian","level":2,"score":0.5224999785423279},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.459199994802475},{"id":"https://openalex.org/C2779293179","wikidata":"https://www.wikidata.org/wiki/Q465266","display_name":"Social force model","level":3,"score":0.44749999046325684},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.4047999978065491},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.3675999939441681},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.3508000075817108},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.349700003862381},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.34929999709129333},{"id":"https://openalex.org/C26990112","wikidata":"https://www.wikidata.org/wiki/Q6887224","display_name":"Mobile robot navigation","level":5,"score":0.32899999618530273},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.3138999938964844},{"id":"https://openalex.org/C2780102126","wikidata":"https://www.wikidata.org/wiki/Q10928179","display_name":"Online and offline","level":2,"score":0.30869999527931213},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.30390000343322754},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2865999937057495},{"id":"https://openalex.org/C2780490138","wikidata":"https://www.wikidata.org/wiki/Q7079636","display_name":"Offline learning","level":3,"score":0.27459999918937683},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.27230000495910645},{"id":"https://openalex.org/C145460709","wikidata":"https://www.wikidata.org/wiki/Q859951","display_name":"Human\u2013robot interaction","level":3,"score":0.2632000148296356},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2540000081062317},{"id":"https://openalex.org/C2777891301","wikidata":"https://www.wikidata.org/wiki/Q3475123","display_name":"Navigation system","level":2,"score":0.25360000133514404},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.25209999084472656}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11245919","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11245919","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W192919555","https://openalex.org/W2082585576","https://openalex.org/W2142943472","https://openalex.org/W2604216058","https://openalex.org/W2890001928","https://openalex.org/W2963809389","https://openalex.org/W2963821308","https://openalex.org/W2964319688","https://openalex.org/W3005857605","https://openalex.org/W3090345358","https://openalex.org/W3205400264"],"related_works":[],"abstract_inverted_index":{"Offline":[0],"reinforcement":[1],"learning":[2],"(RL)":[3],"has":[4],"emerged":[5],"as":[6],"a":[7,49,65,72,112,147],"promising":[8],"framework":[9,98],"for":[10,55,179],"addressing":[11],"robot":[12,56],"social":[13,57,139],"navigation":[14,58,140,169,185],"challenges.":[15],"However,":[16],"inherent":[17],"uncertainties":[18],"in":[19,82,137,167,187],"pedestrian":[20,88],"behavior":[21],"and":[22,33,39,91,132,151,172,182],"limited":[23],"environmental":[24,109],"interaction":[25],"during":[26,124],"training":[27,38,106],"often":[28],"lead":[29],"to":[30,77,120,156],"suboptimal":[31],"exploration":[32],"distributional":[34],"shifts":[35],"between":[36],"offline":[37,104],"online":[40,108],"deployment.":[41],"To":[42],"overcome":[43],"these":[44],"limitations,":[45],"this":[46],"paper":[47],"proposes":[48],"novel":[50],"offline-to-online":[51],"fine-tuning":[52],"RL":[53],"algorithm":[54,70,166],"by":[59,84,102],"integrating":[60],"Return-to-Go":[61],"(RTG)":[62],"prediction":[63,97],"into":[64],"causal":[66],"Transformer":[67],"architecture.":[68],"Our":[69],"features":[71],"spatiotemporal":[73],"fusion":[74],"model":[75],"designed":[76],"precisely":[78],"estimate":[79],"RTG":[80,96],"values":[81],"real-time":[83,133],"jointly":[85],"encoding":[86],"temporal":[87],"motion":[89],"patterns":[90],"spatial":[92],"crowd":[93],"dynamics.":[94],"This":[95,174],"mitigates":[99],"distribution":[100],"shift":[101],"aligning":[103],"policy":[105,122,170],"with":[107],"interactions.":[110],"Furthermore,":[111],"hybrid":[113],"offline-online":[114],"experience":[115],"sampling":[116],"mechanism":[117],"is":[118],"built":[119],"stabilize":[121],"updates":[123],"fine-tuning,":[125],"ensuring":[126],"balanced":[127],"integration":[128],"of":[129,164],"pre-trained":[130],"knowledge":[131],"adaptation.":[134],"Extensive":[135],"experiments":[136],"simulated":[138],"environments":[141],"demonstrate":[142],"that":[143],"our":[144,165],"method":[145],"achieves":[146],"higher":[148],"success":[149],"rate":[150,154],"lower":[152],"collision":[153],"compared":[155],"state-of-the-art":[157],"baselines.":[158],"These":[159],"results":[160],"underscore":[161],"the":[162,177],"efficacy":[163],"enhancing":[168],"robustness":[171],"adaptability.":[173],"work":[175],"paves":[176],"way":[178],"more":[180],"reliable":[181],"adaptive":[183],"robotic":[184],"systems":[186],"real-world":[188],"applications.":[189]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-28T00:00:00"}
