{"id":"https://openalex.org/W7139948117","doi":"https://doi.org/10.1016/j.procs.2026.01.110","title":"A Multi-Objective Reinforcement Learning Approach to Prompt Optimization in NLP","display_name":"A Multi-Objective Reinforcement Learning Approach to Prompt Optimization in NLP","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7139948117","doi":"https://doi.org/10.1016/j.procs.2026.01.110"},"language":"en","primary_location":{"id":"doi:10.1016/j.procs.2026.01.110","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2026.01.110","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1016/j.procs.2026.01.110","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130228454","display_name":"Muhammad Junaid Iqbal","orcid":null},"institutions":[{"id":"https://openalex.org/I116067653","display_name":"University of Rome Tor Vergata","ror":"https://ror.org/02p77k626","country_code":"IT","type":"education","lineage":["https://openalex.org/I116067653"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Muhammad Junaid Iqbal","raw_affiliation_strings":["Department of Enterprise Engineering, University of Roma tor Vergata, Rome, 00133, Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Enterprise Engineering, University of Roma tor Vergata, Rome, 00133, Italy","institution_ids":["https://openalex.org/I116067653"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130251865","display_name":"Muhammad Asghar Khan","orcid":null},"institutions":[{"id":"https://openalex.org/I138564716","display_name":"Prince Mohammad bin Fahd University","ror":"https://ror.org/03d64na34","country_code":"SA","type":"education","lineage":["https://openalex.org/I138564716"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Muhammad Asghar Khan","raw_affiliation_strings":["Humanities and Social Sciences Department, College of Sciences and Human Studies, Prince Mohammad Bin Fahd University, Al Khobar, Saudi Arabia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Humanities and Social Sciences Department, College of Sciences and Human Studies, Prince Mohammad Bin Fahd University, Al Khobar, Saudi Arabia","institution_ids":["https://openalex.org/I138564716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130228743","display_name":"Tahir Alyas","orcid":null},"institutions":[{"id":"https://openalex.org/I4210105419","display_name":"Lahore Garrison University","ror":"https://ror.org/01j4ba358","country_code":"PK","type":"education","lineage":["https://openalex.org/I4210105419"]}],"countries":["PK"],"is_corresponding":true,"raw_author_name":"Tahir Alyas","raw_affiliation_strings":["Department of Computer Science, Lahore Garrison University, Lahore, 54000, Pakistan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Lahore Garrison University, Lahore, 54000, Pakistan","institution_ids":["https://openalex.org/I4210105419"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130241042","display_name":"Sagheer Abbas","orcid":null},"institutions":[{"id":"https://openalex.org/I138564716","display_name":"Prince Mohammad bin Fahd University","ror":"https://ror.org/03d64na34","country_code":"SA","type":"education","lineage":["https://openalex.org/I138564716"]}],"countries":["SA"],"is_corresponding":false,"raw_author_name":"Sagheer Abbas","raw_affiliation_strings":["Department of Computer Science, Prince Mohammad bin Fahd University, Al khobar, Saudi Arabia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Prince Mohammad bin Fahd University, Al khobar, Saudi Arabia","institution_ids":["https://openalex.org/I138564716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130251818","display_name":"Arif Jawaid","orcid":null},"institutions":[{"id":"https://openalex.org/I4210105419","display_name":"Lahore Garrison University","ror":"https://ror.org/01j4ba358","country_code":"PK","type":"education","lineage":["https://openalex.org/I4210105419"]}],"countries":["PK"],"is_corresponding":false,"raw_author_name":"Arif Jawaid","raw_affiliation_strings":["Faculty of Languages, Lahore Garrison University, Lahore, 54000, Pakistan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Languages, Lahore Garrison University, Lahore, 54000, Pakistan","institution_ids":["https://openalex.org/I4210105419"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029654514","display_name":"Fabio Massimo Zanzotto","orcid":"https://orcid.org/0000-0002-7301-3596"},"institutions":[{"id":"https://openalex.org/I116067653","display_name":"University of Rome Tor Vergata","ror":"https://ror.org/02p77k626","country_code":"IT","type":"education","lineage":["https://openalex.org/I116067653"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Fabio Massimo Zanzotto","raw_affiliation_strings":["Department of Enterprise Engineering, University of Roma tor Vergata, Rome, 00133, Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Enterprise Engineering, University of Roma tor Vergata, Rome, 00133, Italy","institution_ids":["https://openalex.org/I116067653"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5130228743"],"corresponding_institution_ids":["https://openalex.org/I4210105419"],"apc_list":null,"apc_paid":null,"fwci":29.4122,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.993108,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"275","issue":null,"first_page":"966","last_page":"974"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.2976999878883362,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.2976999878883362,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11010","display_name":"Logic, Reasoning, and Knowledge","score":0.04879999905824661,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10906","display_name":"AI-based Problem Solving and Planning","score":0.03920000046491623,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6832000017166138},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.265500009059906},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.26499998569488525},{"id":"https://openalex.org/keywords/active-learning","display_name":"Active learning (machine learning)","score":0.2337999939918518},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.23340000212192535}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9071999788284302},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7131999731063843},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6832000017166138},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5701000094413757},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41690000891685486},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.26499998569488525},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.2337999939918518},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.23340000212192535},{"id":"https://openalex.org/C2987595161","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Optimization algorithm","level":2,"score":0.21850000321865082}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1016/j.procs.2026.01.110","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2026.01.110","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1016/j.procs.2026.01.110","is_oa":true,"landing_page_url":"https://doi.org/10.1016/j.procs.2026.01.110","pdf_url":null,"source":{"id":"https://openalex.org/S120348307","display_name":"Procedia Computer Science","issn_l":"1877-0509","issn":["1877-0509"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320990","host_organization_name":"Elsevier BV","host_organization_lineage":["https://openalex.org/P4310320990"],"host_organization_lineage_names":["Elsevier BV"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Procedia Computer Science","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W3185369679","https://openalex.org/W3198377975","https://openalex.org/W4389869683","https://openalex.org/W4390511900","https://openalex.org/W4403842724","https://openalex.org/W4405746966","https://openalex.org/W4406311999","https://openalex.org/W4406421219","https://openalex.org/W4406580101","https://openalex.org/W4407155551","https://openalex.org/W4409907585","https://openalex.org/W4410160438","https://openalex.org/W4411638763","https://openalex.org/W4413122229","https://openalex.org/W4413223923","https://openalex.org/W4414908495","https://openalex.org/W4415404858"],"related_works":[],"abstract_inverted_index":{"Prompt":[0],"sensitivity":[1],"remains":[2],"a":[3,30,56,105,126],"major":[4],"challenge":[5],"in":[6,40,130,155,168,176,184],"Large":[7],"Language":[8],"Models":[9],"(LLMs)":[10],"such":[11],"as":[12],"GPT-4,":[13],"Qwen,":[14],"DeepSeek,":[15],"and":[16,47,63,70,90,111,134,152,163,179,192],"Llama,":[17],"often":[18],"leading":[19],"to":[20,81,117,195,204],"inconsistent,":[21],"biased,":[22],"or":[23],"excessively":[24],"long":[25],"outputs.":[26],"This":[27],"paper":[28],"presents":[29],"Multi-Objective":[31],"Reinforcement":[32],"Learning":[33],"(MORL)":[34],"system,":[35],"which":[36],"optimizes":[37],"prompts":[38],"automatically":[39],"terms":[41],"of":[42,49,61,75],"accuracy,":[43],"fairness,":[44],"robustness,":[45],"informativeness,":[46],"efficiency":[48],"tokens.":[50],"The":[51,121],"system":[52],"is":[53,102,123,200],"conditioned":[54],"on":[55,143,149],"2520-sample":[57],"culturally":[58,205],"diverse":[59],"dataset":[60],"religious":[62],"non-religious":[64],"sentence":[65,177],"pairs,":[66],"four":[67],"semantic":[68],"dimensions,":[69],"different":[71],"countries.":[72],"Statistical":[73],"analysis":[74],"the":[76,87,94,112,115,144,170,186],"dimension-specific":[77],"variability":[78],"was":[79,166],"found":[80],"be":[82],"high,":[83],"with":[84,125],"food":[85],"having":[86,93],"highest":[88],"bias":[89,96],"time":[91],"representation":[92],"lowest":[95],"driving":[97],"reward":[98,129],"shaping.":[99],"Every":[100],"encounter":[101],"coded":[103],"into":[104],"rich":[106],"linguistic":[107],"contextual":[108],"state":[109],"vector,":[110],"information":[113],"allows":[114],"agent":[116],"learn":[118],"corrective":[119],"strategies.":[120],"optimization":[122],"done":[124],"weighted":[127],"multi-objective":[128],"Deep":[131],"Q-Network":[132],"(DQN)":[133],"Proximal":[135],"Policy":[136],"Optimization":[137],"(PPO).":[138],"Although":[139],"DQN":[140],"narrowly":[141],"converged":[142],"rewards,":[145,160],"PPO":[146,165],"performed":[147],"better":[148],"both":[150],"validation":[151],"test":[153],"sets":[154],"all":[156],"aspects,":[157],"including":[158],"higher":[159],"action":[161],"diversity,":[162],"generalization.":[164],"effective":[167,202],"reducing":[169],"high-bias":[171],"cases,":[172],"provided":[173],"more":[174,189],"balance":[175],"sentiments,":[178],"stabilized":[180],"interdimensional":[181],"performance.":[182],"All":[183],"all,":[185],"framework":[187],"provides":[188],"accurate,":[190],"informative,":[191],"consistent":[193],"responses":[194],"LLM,":[196],"showing":[197],"that":[198],"MORL":[199],"an":[201],"solution":[203],"sensitive":[206],"prompt":[207],"optimization.":[208]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-05-10T08:33:47.465468","created_date":"2026-03-21T00:00:00"}
