{"id":"https://openalex.org/W7130589917","doi":"https://doi.org/10.1109/fllm67465.2025.11391148","title":"Collective Reasoning Among LLMs: A Framework for Answer Validation Without Ground Truth","display_name":"Collective Reasoning Among LLMs: A Framework for Answer Validation Without Ground Truth","publication_year":2025,"publication_date":"2025-11-25","ids":{"openalex":"https://openalex.org/W7130589917","doi":"https://doi.org/10.1109/fllm67465.2025.11391148"},"language":null,"primary_location":{"id":"doi:10.1109/fllm67465.2025.11391148","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fllm67465.2025.11391148","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 3rd International Conference on Foundation and Large Language Models (FLLM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114988003","display_name":"Seyed Pouyan Mousavi Davoudi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113791","display_name":"Chemistry and Chemical Engineering Research Center of Iran","ror":"https://ror.org/020sjp894","country_code":"IR","type":"facility","lineage":["https://openalex.org/I4210113791"]}],"countries":["IR"],"is_corresponding":true,"raw_author_name":"Seyed Pouyan Mousavi Davoudi","raw_affiliation_strings":["AI and Statistics,Tehran,Iran"],"affiliations":[{"raw_affiliation_string":"AI and Statistics,Tehran,Iran","institution_ids":["https://openalex.org/I4210113791"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034791306","display_name":"A. G. Davodi","orcid":"https://orcid.org/0000-0003-2015-6064"},"institutions":[{"id":"https://openalex.org/I176861719","display_name":"University of Shahrood","ror":"https://ror.org/00yqvtm78","country_code":"IR","type":"education","lineage":["https://openalex.org/I176861719"]}],"countries":["IR"],"is_corresponding":false,"raw_author_name":"Amin Gholami Davodi","raw_affiliation_strings":["Shahrood University of Technology,Tehran,Iran"],"affiliations":[{"raw_affiliation_string":"Shahrood University of Technology,Tehran,Iran","institution_ids":["https://openalex.org/I176861719"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126408049","display_name":"Alireza Amiri Margavi","orcid":null},"institutions":[{"id":"https://openalex.org/I4210125693","display_name":"Simulation Technologies (United States)","ror":"https://ror.org/02ypd5h61","country_code":"US","type":"company","lineage":["https://openalex.org/I4210125693"]},{"id":"https://openalex.org/I4210151178","display_name":"System Simulation (United Kingdom)","ror":"https://ror.org/04gf6zh40","country_code":"GB","type":"company","lineage":["https://openalex.org/I4210151178"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Alireza Amiri Margavi","raw_affiliation_strings":["University of Pittsburgh,Computational Modeling and Simulation,USA"],"affiliations":[{"raw_affiliation_string":"University of Pittsburgh,Computational Modeling and Simulation,USA","institution_ids":["https://openalex.org/I4210125693","https://openalex.org/I4210151178"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5126381473","display_name":"Mahdi Jafari","orcid":null},"institutions":[{"id":"https://openalex.org/I165102784","display_name":"Duquesne University","ror":"https://ror.org/02336z538","country_code":"US","type":"education","lineage":["https://openalex.org/I165102784"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mahdi Jafari","raw_affiliation_strings":["Duquesne University,Analytics and Information Science,USA"],"affiliations":[{"raw_affiliation_string":"Duquesne University,Analytics and Information Science,USA","institution_ids":["https://openalex.org/I165102784"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5114988003"],"corresponding_institution_ids":["https://openalex.org/I4210113791"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.88496174,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"343","last_page":"349"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.8776000142097473,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.8776000142097473,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.030300000682473183,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.029500000178813934,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.557699978351593},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.5573999881744385},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.521399974822998},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.5160999894142151},{"id":"https://openalex.org/keywords/dependability","display_name":"Dependability","score":0.4077000021934509},{"id":"https://openalex.org/keywords/test","display_name":"Test (biology)","score":0.3756999969482422},{"id":"https://openalex.org/keywords/uncertainty-quantification","display_name":"Uncertainty quantification","score":0.3334999978542328},{"id":"https://openalex.org/keywords/empirical-research","display_name":"Empirical research","score":0.3138999938964844}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5630000233650208},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.557699978351593},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.5573999881744385},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.521399974822998},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.5160999894142151},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4828999936580658},{"id":"https://openalex.org/C77019957","wikidata":"https://www.wikidata.org/wiki/Q2689057","display_name":"Dependability","level":2,"score":0.4077000021934509},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.3756999969482422},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37299999594688416},{"id":"https://openalex.org/C32230216","wikidata":"https://www.wikidata.org/wiki/Q7882499","display_name":"Uncertainty quantification","level":2,"score":0.3334999978542328},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.3138999938964844},{"id":"https://openalex.org/C87007009","wikidata":"https://www.wikidata.org/wiki/Q210832","display_name":"Statistical hypothesis testing","level":2,"score":0.31279999017715454},{"id":"https://openalex.org/C44249647","wikidata":"https://www.wikidata.org/wiki/Q208498","display_name":"Confidence interval","level":2,"score":0.3052999973297119},{"id":"https://openalex.org/C148220186","wikidata":"https://www.wikidata.org/wiki/Q7111912","display_name":"Outcome (game theory)","level":2,"score":0.3046000003814697},{"id":"https://openalex.org/C2777877512","wikidata":"https://www.wikidata.org/wiki/Q1116097","display_name":"Common ground","level":2,"score":0.29409998655319214},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.29190000891685486},{"id":"https://openalex.org/C137209882","wikidata":"https://www.wikidata.org/wiki/Q1403517","display_name":"Measurement uncertainty","level":2,"score":0.29019999504089355},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28119999170303345},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.27230000495910645},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.2547999918460846},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.2533999979496002}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/fllm67465.2025.11391148","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fllm67465.2025.11391148","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 3rd International Conference on Foundation and Large Language Models (FLLM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1534477342","https://openalex.org/W1967003847","https://openalex.org/W2062537151","https://openalex.org/W2120220258","https://openalex.org/W2160643434","https://openalex.org/W2328472937","https://openalex.org/W2771605021","https://openalex.org/W2945976633","https://openalex.org/W2985543011","https://openalex.org/W3030030520","https://openalex.org/W3104119469","https://openalex.org/W3133702157","https://openalex.org/W3165389214","https://openalex.org/W3212368439","https://openalex.org/W4236544382","https://openalex.org/W4288083801","https://openalex.org/W4288359825","https://openalex.org/W4386870427","https://openalex.org/W4389524566","https://openalex.org/W4401488582","https://openalex.org/W4411119494","https://openalex.org/W4411531987","https://openalex.org/W4411638692","https://openalex.org/W4414281281","https://openalex.org/W4415795422"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"a":[3,127],"novel":[4],"framework":[5],"in":[6,105,134],"which":[7],"multiple":[8],"advanced":[9],"large":[10],"language":[11],"models\u2014GPT-4-0125-preview,":[12],"Meta-LlaMA-3-70B-Instruct,":[13],"Claude-3-Opus,":[14],"and":[15,18,48,57,66,74,81,99,122],"Gemini-1.5-Flash\u2014jointly":[16],"generate":[17],"answer":[19,120],"complex,":[20],"doctoral-level":[21],"probability":[22],"problems":[23],"without":[24],"relying":[25],"on":[26],"predefined":[27],"ground":[28],"truth.":[29],"Instead":[30],"of":[31,45],"measuring":[32],"accuracy":[33],"against":[34],"fixed":[35],"answers,":[36],"we":[37],"interpret":[38],"inter-model":[39],"agreement":[40],"as":[41],"an":[42],"empirical":[43],"indicator":[44],"response":[46],"reliability":[47],"question":[49,123],"quality.":[50],"Statistical":[51],"analyses\u2014including":[52],"chisquare":[53],"tests,":[54],"Fleiss\u2019":[55],"\u03ba,":[56],"bootstrap":[58],"confidence":[59,83],"intervals\u2014are":[60],"used":[61],"to":[62,86,131],"quantify":[63],"alignment,":[64],"variability,":[65],"stability":[67],"across":[68],"models.":[69,91],"Results":[70],"show":[71],"that":[72,111],"Claude":[73],"Gemini":[75],"produce":[76],"questions":[77],"with":[78],"higher":[79],"coherence":[80],"narrower":[82],"intervals,":[84],"leading":[85],"stronger":[87],"consensus":[88],"among":[89,114],"answering":[90],"In":[92],"contrast,":[93],"LLaMA":[94],"exhibits":[95],"broader":[96],"uncertainty":[97],"ranges":[98],"lower":[100],"agreement,":[101],"reflecting":[102],"greater":[103],"inconsistency":[104],"its":[106],"formulations.":[107],"These":[108],"findings":[109],"demonstrate":[110],"collaborative":[112],"reasoning":[113,137],"heterogeneous":[115],"LLMs":[116],"can":[117],"enhance":[118],"both":[119],"dependability":[121],"design":[124],"evaluation,":[125],"offering":[126],"scalable,":[128],"data-driven":[129],"approach":[130],"truth-free":[132],"validation":[133],"multi-model":[135],"AI":[136],"systems.":[138]},"counts_by_year":[],"updated_date":"2026-02-20T17:44:18.066148","created_date":"2026-02-20T00:00:00"}
