{"id":"https://openalex.org/W4416252037","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228172","title":"PromptMetric: Prompt Recipe as an Automatic Metric for Evaluating Open-domain Question Answering Systems","display_name":"PromptMetric: Prompt Recipe as an Automatic Metric for Evaluating Open-domain Question Answering Systems","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4416252037","doi":"https://doi.org/10.1109/ijcnn64981.2025.11228172"},"language":null,"primary_location":{"id":"doi:10.1109/ijcnn64981.2025.11228172","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228172","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Pengzhe Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Pengzhe Wang","raw_affiliation_strings":["University of Science and Technology of China,School of Cyber Science and Technology,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,School of Cyber Science and Technology,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101901622","display_name":"Xin Zeng","orcid":"https://orcid.org/0009-0000-6036-7320"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xin Zeng","raw_affiliation_strings":["University of Science and Technology of China,School of Cyber Science and Technology,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,School of Cyber Science and Technology,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113053510","display_name":"Chiwei Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chiwei Zhu","raw_affiliation_strings":["University of Science and Technology of China,School of Cyber Science and Technology,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,School of Cyber Science and Technology,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085563247","display_name":"Benfeng Xu","orcid":"https://orcid.org/0000-0003-0976-1634"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Benfeng Xu","raw_affiliation_strings":["University of Science and Technology of China,School of Cyber Science and Technology,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,School of Cyber Science and Technology,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023341829","display_name":"Zhendong Mao","orcid":"https://orcid.org/0000-0001-5739-8126"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhendong Mao","raw_affiliation_strings":["University of Science and Technology of China,School of Cyber Science and Technology,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,School of Cyber Science and Technology,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046305086","display_name":"Yongdong Zhang","orcid":"https://orcid.org/0000-0002-1151-1792"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongdong Zhang","raw_affiliation_strings":["University of Science and Technology of China,School of Cyber Science and Technology,Hefei,China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China,School of Cyber Science and Technology,Hefei,China","institution_ids":["https://openalex.org/I126520041"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I126520041"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.18068972,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9014000296592712,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9014000296592712,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.027400000020861626,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.014499999582767487,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.7070000171661377},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6238999962806702},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.5708000063896179},{"id":"https://openalex.org/keywords/recipe","display_name":"Recipe","score":0.5376999974250793},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.5045999884605408},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4787999987602234},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.39239999651908875},{"id":"https://openalex.org/keywords/evaluation-methods","display_name":"Evaluation methods","score":0.3605000078678131}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7900999784469604},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.7070000171661377},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6238999962806702},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5770999789237976},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.5708000063896179},{"id":"https://openalex.org/C2778671685","wikidata":"https://www.wikidata.org/wiki/Q219239","display_name":"Recipe","level":2,"score":0.5376999974250793},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5073999762535095},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.5045999884605408},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4837999939918518},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4787999987602234},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.39239999651908875},{"id":"https://openalex.org/C3018395757","wikidata":"https://www.wikidata.org/wiki/Q1379672","display_name":"Evaluation methods","level":2,"score":0.3605000078678131},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3424000144004822},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.2962999939918518},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.29580000042915344},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2881999909877777},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.26409998536109924},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.26260000467300415},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.25780001282691956},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2549999952316284}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn64981.2025.11228172","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn64981.2025.11228172","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W2912924812","https://openalex.org/W2963748441","https://openalex.org/W3159959439","https://openalex.org/W4385572634","https://openalex.org/W4389523991","https://openalex.org/W4393160302","https://openalex.org/W4402671689"],"related_works":[],"abstract_inverted_index":{"Open-domain":[0],"Question":[1],"Answering":[2],"(ODQA)":[3],"has":[4],"long":[5],"been":[6],"an":[7],"NLP":[8],"task":[9],"receiving":[10],"wide":[11,36],"attention":[12],"of":[13,24,38,69,96,144],"researchers.":[14],"Despite":[15],"being":[16],"utilized":[17],"various":[18],"domains":[19],"and":[20,105,132,150],"applications,":[21],"the":[22,35,67,74,142],"evaluation":[23,56,99,130,134],"ODQA":[25,71,161],"systems":[26],"remains":[27],"a":[28,94],"complicated":[29],"problem,":[30],"which":[31,107,119],"is":[32],"worsened":[33],"by":[34,112],"usage":[37],"large":[39],"language":[40],"models(LLMs).":[41],"As":[42],"LLMs":[43],"often":[44],"generate":[45],"free-form":[46],"answers":[47],"that":[48,152],"do":[49],"not":[50,62],"follow":[51],"certain":[52],"format,":[53],"traditional":[54,129],"string-matching-driven":[55],"metrics":[57],"like":[58],"Lexical":[59],"Match":[60],"are":[61],"feasible":[63],"to":[64,122],"accurately":[65],"reflect":[66],"performance":[68],"LLM-based":[70],"systems.":[72],"In":[73],"meantime,":[75],"LLM-as-a-Judge":[76],"methods":[77,131,135,146],"with":[78,85,136],"simple":[79],"prompts":[80,100],"also":[81,140],"display":[82],"limited":[83],"consistency":[84],"human":[86,113],"annotators.":[87],"To":[88],"tackle":[89],"above":[90],"challenges,":[91],"we":[92,120],"propose":[93],"framework":[95],"developing":[97],"effective":[98],"based":[101],"on":[102,147],"iterative":[103],"test":[104],"optimization,":[106],"can":[108,154],"be":[109,155],"conducted":[110],"either":[111],"or":[114],"LLMs.":[115],"The":[116],"resulting":[117],"prompt,":[118],"refer":[121],"as":[123],"PromptMetric,":[124],"shows":[125],"considerable":[126],"advantages":[127],"over":[128],"LLM":[133],"basic":[137],"prompts.":[138],"We":[139],"demonstrate":[141],"robustness":[143],"our":[145],"different":[148],"models,":[149],"show":[151],"PromptMetric":[153],"highly":[156],"economical":[157],"when":[158],"applied":[159],"in":[160],"evaluation.":[162]},"counts_by_year":[],"updated_date":"2026-04-17T18:11:37.981687","created_date":"2025-11-14T00:00:00"}
