{"id":"https://openalex.org/W4417452344","doi":"https://doi.org/10.48550/arxiv.2512.12868","title":"Counting Clues: A Lightweight Probabilistic Baseline Can Match an LLM","display_name":"Counting Clues: A Lightweight Probabilistic Baseline Can Match an LLM","publication_year":2025,"publication_date":"2025-12-14","ids":{"openalex":"https://openalex.org/W4417452344","doi":"https://doi.org/10.48550/arxiv.2512.12868"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2512.12868","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.12868","pdf_url":"https://arxiv.org/pdf/2512.12868","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2512.12868","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120841712","display_name":"Furong Jia","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jia, Furong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101848345","display_name":"Yuan Pu","orcid":"https://orcid.org/0000-0002-1322-5642"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pu, Yuan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101556960","display_name":"Feng Guo","orcid":"https://orcid.org/0000-0001-8131-9301"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Finn","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5088961661","display_name":"Monica Agrawal","orcid":"https://orcid.org/0000-0002-6515-329X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Agrawal, Monica","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5120841712"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.5830000042915344,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.5830000042915344,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1599999964237213,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.06889999657869339,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.8324999809265137},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.72079998254776},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6729000210762024},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6008999943733215},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5993000268936157},{"id":"https://openalex.org/keywords/point-estimation","display_name":"Point estimation","score":0.580299973487854},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.5116999745368958},{"id":"https://openalex.org/keywords/bayes-theorem","display_name":"Bayes' theorem","score":0.4959999918937683},{"id":"https://openalex.org/keywords/probabilistic-classification","display_name":"Probabilistic classification","score":0.44020000100135803}],"concepts":[{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.8324999809265137},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.72079998254776},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6977999806404114},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6729000210762024},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6240000128746033},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.604200005531311},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6008999943733215},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5993000268936157},{"id":"https://openalex.org/C41426520","wikidata":"https://www.wikidata.org/wiki/Q1192065","display_name":"Point estimation","level":2,"score":0.580299973487854},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.5116999745368958},{"id":"https://openalex.org/C207201462","wikidata":"https://www.wikidata.org/wiki/Q182505","display_name":"Bayes' theorem","level":3,"score":0.4959999918937683},{"id":"https://openalex.org/C189119545","wikidata":"https://www.wikidata.org/wiki/Q5128022","display_name":"Probabilistic classification","level":4,"score":0.44020000100135803},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.3984000086784363},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.3702000081539154},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.36959999799728394},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.36149999499320984},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.36149999499320984},{"id":"https://openalex.org/C134261354","wikidata":"https://www.wikidata.org/wiki/Q938438","display_name":"Statistical inference","level":2,"score":0.3479999899864197},{"id":"https://openalex.org/C107673813","wikidata":"https://www.wikidata.org/wiki/Q812534","display_name":"Bayesian probability","level":2,"score":0.3330000042915344},{"id":"https://openalex.org/C160234255","wikidata":"https://www.wikidata.org/wiki/Q812535","display_name":"Bayesian inference","level":3,"score":0.3287000060081482},{"id":"https://openalex.org/C60782215","wikidata":"https://www.wikidata.org/wiki/Q3333679","display_name":"Probabilistic method","level":3,"score":0.32330000400543213},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.31439998745918274},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.29899999499320984},{"id":"https://openalex.org/C87007009","wikidata":"https://www.wikidata.org/wiki/Q210832","display_name":"Statistical hypothesis testing","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C196083921","wikidata":"https://www.wikidata.org/wiki/Q7915758","display_name":"Variance (accounting)","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2678000032901764},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.258899986743927},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.25450000166893005}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2512.12868","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.12868","pdf_url":"https://arxiv.org/pdf/2512.12868","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2512.12868","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.12868","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2512.12868","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.12868","pdf_url":"https://arxiv.org/pdf/2512.12868","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1],"models":[2],"(LLMs)":[3],"excel":[4],"on":[5,88],"multiple-choice":[6],"clinical":[7],"diagnosis":[8],"benchmarks,":[9],"yet":[10],"it":[11],"is":[12,33],"unclear":[13],"how":[14],"much":[15],"of":[16,113,122,143,176],"this":[17,25],"performance":[18,82,130,142],"reflects":[19],"underlying":[20],"probabilistic":[21,124],"reasoning.":[22],"We":[23,40],"study":[24],"through":[26],"questions":[27,100],"from":[28,62,71],"MedQA,":[29],"where":[30],"the":[31,36,42,72,84,119,141,164],"task":[32],"to":[34,83,146,163],"select":[35],"most":[37],"likely":[38],"diagnosis.":[39],"introduce":[41],"Frequency-Based":[43],"Probabilistic":[44],"Ranker":[45],"(FBPR),":[46],"a":[47,54,63,128,134,150,173],"lightweight":[48],"method":[49],"that":[50,89,159],"scores":[51],"options":[52],"with":[53,102],"smoothed":[55],"Naive":[56],"Bayes":[57],"over":[58],"concept-diagnosis":[59],"co-occurrence":[60,67],"statistics":[61,68],"large":[64],"corpus.":[65,91],"When":[66],"were":[69],"sourced":[70],"pretraining":[73],"corpora":[74],"for":[75,137,172],"OLMo":[76],"and":[77,95,133],"Llama,":[78],"FBPR":[79,96],"achieves":[80],"comparable":[81],"corresponding":[85],"LLMs":[86,144],"pretrained":[87],"same":[90],"Direct":[92],"LLM":[93],"inference":[94],"largely":[97],"get":[98],"different":[99],"correct,":[101],"an":[103,160],"overlap":[104],"only":[105],"slightly":[106],"above":[107],"random":[108],"chance,":[109],"indicating":[110],"complementary":[111,135],"strengths":[112],"each":[114],"method.":[115],"These":[116],"findings":[117],"highlight":[118],"continued":[120],"value":[121],"explicit":[123],"baselines:":[125],"they":[126],"provide":[127],"meaningful":[129],"reference":[131],"point":[132],"signal":[136],"potential":[138],"hybridization.":[139],"While":[140],"seems":[145],"be":[147],"driven":[148],"by":[149],"mechanism":[151],"other":[152],"than":[153],"simple":[154],"frequency":[155],"aggregation,":[156],"we":[157],"show":[158],"approach":[161],"similar":[162],"historically":[165],"grounded,":[166],"low-complexity":[167],"expert":[168],"systems":[169],"still":[170],"accounts":[171],"substantial":[174],"portion":[175],"benchmark":[177],"performance.":[178]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-12-17T00:00:00"}
