{"id":"https://openalex.org/W4402353454","doi":"https://doi.org/10.1109/ijcnn60899.2024.10650668","title":"Exploring and Improving Consistency in Large Language Models for Multiple-Choice Question Assessment","display_name":"Exploring and Improving Consistency in Large Language Models for Multiple-Choice Question Assessment","publication_year":2024,"publication_date":"2024-06-30","ids":{"openalex":"https://openalex.org/W4402353454","doi":"https://doi.org/10.1109/ijcnn60899.2024.10650668"},"language":"en","primary_location":{"id":"doi:10.1109/ijcnn60899.2024.10650668","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn60899.2024.10650668","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101918392","display_name":"Wenjie Zhou","orcid":"https://orcid.org/0000-0002-8385-7194"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenjie Zhou","raw_affiliation_strings":["Soochow University,School of Computer Science and Technology,Soochow,China"],"affiliations":[{"raw_affiliation_string":"Soochow University,School of Computer Science and Technology,Soochow,China","institution_ids":["https://openalex.org/I3923682"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089990864","display_name":"Xiangyu Duan","orcid":"https://orcid.org/0000-0002-7268-8823"},"institutions":[{"id":"https://openalex.org/I3923682","display_name":"Soochow University","ror":"https://ror.org/05t8y2r12","country_code":"CN","type":"education","lineage":["https://openalex.org/I3923682"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyu Duan","raw_affiliation_strings":["Soochow University,School of Computer Science and Technology,Soochow,China"],"affiliations":[{"raw_affiliation_string":"Soochow University,School of Computer Science and Technology,Soochow,China","institution_ids":["https://openalex.org/I3923682"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101918392"],"corresponding_institution_ids":["https://openalex.org/I3923682"],"apc_list":null,"apc_paid":null,"fwci":0.3626,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.66481918,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9905999898910522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.9882000088691711,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9840999841690063,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7197498083114624},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6985618472099304},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3283535838127136},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.29208609461784363}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7197498083114624},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6985618472099304},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3283535838127136},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29208609461784363}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ijcnn60899.2024.10650668","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ijcnn60899.2024.10650668","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 International Joint Conference on Neural Networks (IJCNN)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6700000166893005,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W3185341429","https://openalex.org/W3201174429","https://openalex.org/W4206825193","https://openalex.org/W4285210452","https://openalex.org/W4385571157","https://openalex.org/W4389520124","https://openalex.org/W4391136507","https://openalex.org/W4401042689"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"With":[0],"the":[1,43,63,95,100,104,135,140,143,163,176,193,202],"evolution":[2],"of":[3,66,97,106,195,204],"Large":[4],"Language":[5],"Models":[6],"(LLMs),":[7],"accurately":[8],"evaluating":[9],"their":[10,51,197],"capabilities":[11],"has":[12],"become":[13],"a":[14],"critical":[15],"focus.":[16],"The":[17],"Multi-choice":[18],"Questions":[19],"(MCQ)":[20],"benchmark":[21],"is":[22,40,182],"widely":[23],"adopted":[24],"for":[25],"its":[26,107],"definitive":[27],"answers":[28],"and":[29,50,61,83,93,155,186,199],"straightforward":[30],"assessment":[31],"approach.":[32],"However,":[33],"recent":[34],"studies":[35],"have":[36],"found":[37],"that":[38],"there":[39],"inconsistency":[41],"in":[42,55,68,99,190,201],"model,":[44,101],"raising":[45],"concerns":[46],"about":[47],"potential":[48],"biases":[49],"genuine":[52],"comprehension":[53],"abilities":[54,146],"MCQ":[56,69,205],"contexts.":[57],"To":[58],"delve":[59],"into":[60],"improve":[62],"consistency":[64,77,82,98,188],"performance":[65],"LLMs":[67],"answering":[70],"domain,":[71],"this":[72],"study":[73],"first":[74],"proposes":[75],"new":[76],"metrics,":[78,113],"including":[79],"option":[80,84],"position":[81],"symbol":[85],"consistency.":[86],"These":[87],"metrics":[88],"are":[89],"designed":[90],"to":[91,147,174,184],"quantify":[92],"reveal":[94],"level":[96],"thereby":[102],"assessing":[103],"authenticity":[105],"knowledge":[108],"comprehension.":[109],"Secondly,":[110],"utilizing":[111],"these":[112],"we":[114],"propose":[115],"two":[116],"novel":[117],"improvement":[118],"strategies:":[119],"1)":[120],"An":[121],"enhanced":[122],"In-context":[123],"Learning":[124],"(ICL)":[125],"prompt":[126],"customization":[127],"technique,":[128],"which":[129,161],"adaptively":[130],"modifies":[131],"prompts":[132,141],"based":[133],"on":[134,168],"model\u2019s":[136,144,177],"demonstrated":[137],"capabilities,":[138],"aligning":[139],"with":[142,192],"inherent":[145,178],"filter":[148],"out":[149],"questions":[150],"it":[151],"deems":[152],"consistently":[153],"answerable;":[154],"2)":[156],"Consistency":[157],"Supervised":[158],"Fine-Tuning":[159],"(CSFT),":[160],"enriches":[162],"training":[164],"data":[165],"set":[166],"focused":[167],"consistency,":[169],"followed":[170],"by":[171],"specialized":[172],"fine-tuning":[173],"augment":[175],"capabilities.":[179],"Our":[180],"research":[181],"committed":[183],"exploring":[185],"improving":[187],"levels":[189],"models,":[191],"goal":[194],"bolstering":[196],"integrity":[198],"reliability":[200],"realm":[203],"answering.":[206]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
