{"id":"https://openalex.org/W7161730792","doi":"https://doi.org/10.48550/arxiv.2605.18752","title":"Traditional statistical representations outperform generative AI in identifying expert peer reviewers","display_name":"Traditional statistical representations outperform generative AI in identifying expert peer reviewers","publication_year":2026,"publication_date":"2026-05-18","ids":{"openalex":"https://openalex.org/W7161730792","doi":"https://doi.org/10.48550/arxiv.2605.18752"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.18752","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18752","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.18752","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114637051","display_name":"Vicente Amado Olivo","orcid":"https://orcid.org/0000-0003-2248-0941"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Olivo, Vicente Amado","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136475111","display_name":"Tereza Jerabkova","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jerabkova, Tereza","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136501046","display_name":"Jakub Klencki","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Klencki, Jakub","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136485401","display_name":"John Carpenter","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Carpenter, John","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051043566","display_name":"Mario Mali\u010dki","orcid":"https://orcid.org/0000-0003-0698-1930"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mali\u010dki, Mario","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078274655","display_name":"F. Patat","orcid":"https://orcid.org/0000-0002-0537-3573"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Patat, Ferdinando","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5136490392","display_name":"Louis-Gregory Strolger","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Strolger, Louis-Gregory","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5136504786","display_name":"Wolfgang Kerzendorf","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kerzendorf, Wolfgang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.9316999912261963,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.9316999912261963,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.024700000882148743,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.00989999994635582,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.6370000243186951},{"id":"https://openalex.org/keywords/subject-matter-expert","display_name":"Subject-matter expert","score":0.5993000268936157},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5194000005722046},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4643999934196472},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.45969998836517334},{"id":"https://openalex.org/keywords/statistical-model","display_name":"Statistical model","score":0.41780000925064087},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.3758000135421753},{"id":"https://openalex.org/keywords/framing","display_name":"Framing (construction)","score":0.37299999594688416}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7860999703407288},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.6370000243186951},{"id":"https://openalex.org/C105002631","wikidata":"https://www.wikidata.org/wiki/Q4833645","display_name":"Subject-matter expert","level":3,"score":0.5993000268936157},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5194000005722046},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4650999903678894},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4643999934196472},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.45969998836517334},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.44130000472068787},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.41780000925064087},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.40459999442100525},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38429999351501465},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.3758000135421753},{"id":"https://openalex.org/C169087156","wikidata":"https://www.wikidata.org/wiki/Q2131593","display_name":"Framing (construction)","level":2,"score":0.37299999594688416},{"id":"https://openalex.org/C72161134","wikidata":"https://www.wikidata.org/wiki/Q5421219","display_name":"Expert elicitation","level":2,"score":0.33880001306533813},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.33880001306533813},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3327000141143799},{"id":"https://openalex.org/C2777655017","wikidata":"https://www.wikidata.org/wiki/Q1501161","display_name":"Toolbox","level":2,"score":0.3321000039577484},{"id":"https://openalex.org/C58328972","wikidata":"https://www.wikidata.org/wiki/Q184609","display_name":"Expert system","level":2,"score":0.319599986076355},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.31470000743865967},{"id":"https://openalex.org/C3770464","wikidata":"https://www.wikidata.org/wiki/Q775963","display_name":"Smoothing","level":2,"score":0.30799999833106995},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.3027999997138977},{"id":"https://openalex.org/C2781083858","wikidata":"https://www.wikidata.org/wiki/Q17327049","display_name":"Scientific literature","level":2,"score":0.30239999294281006},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2565000057220459}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.18752","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18752","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.18752","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.18752","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6275757551193237}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"exponential":[1],"growth":[2],"of":[3,18,28,54,72,100,154],"scientific":[4,208],"submissions":[5],"has":[6,23],"strained":[7],"the":[8,13,25,52,95,149,155,177],"peer":[9,97,191],"review":[10,98],"system.":[11],"Despite":[12],"rapidly":[14],"expanding":[15],"global":[16],"pool":[17],"researchers,":[19],"this":[20],"unprecedented":[21],"scale":[22],"rendered":[24],"previous":[26],"approach":[27],"manual":[29],"expert":[30,48,86,147],"identification":[31,77,87],"unfeasible.":[32],"Therefore,":[33],"institutions":[34],"have":[35],"naturally":[36],"turned":[37],"to":[38,43,79,158],"Large":[39],"Language":[40],"Models":[41],"(LLMs)":[42],"automate":[44],"intricate":[45],"processes":[46],"like":[47],"reviewer":[49],"identification.":[50],"However,":[51],"reliability":[53,82],"these":[55],"new":[56],"models":[57],"in":[58,180,206],"accurately":[59],"identifying":[60],"domain":[61,116],"experts":[62],"lacks":[63],"rigorous":[64,186],"evaluation.":[65],"We":[66],"conduct":[67],"a":[68,101,145,185],"comprehensive":[69],"empirical":[70],"evaluation":[71,187],"statistical":[73,133,199],"and":[74,83,125,197],"AI-driven":[75],"expertise":[76,169],"methodologies":[78,121],"benchmark":[80],"their":[81],"limitations.":[84],"Framing":[85],"as":[88,110],"an":[89],"information":[90],"retrieval":[91,120],"problem,":[92],"we":[93,129,193],"utilize":[94],"distributed":[96],"system":[99],"major":[102],"international":[103],"astronomical":[104],"observatory,":[105],"where":[106],"proposal":[107],"authorship":[108],"serves":[109],"our":[111],"proxy":[112],"ground":[113],"truth":[114],"for":[115,160,189],"expertise.":[117],"Evaluating":[118],"six":[119],"utilized":[122],"across":[123],"observatories":[124],"computer":[126],"science":[127],"conferences,":[128],"demonstrate":[130,194],"that":[131,166,195],"traditional":[132],"representations":[134,200],"outperform":[135,202],"generative":[136,181],"AI.":[137],"Specifically,":[138],"Term":[139],"Frequency-Inverse":[140],"Document":[141],"Frequency":[142],"successfully":[143],"identified":[144],"labeled":[146],"within":[148],"top":[150],"25":[151],"recommendations":[152],"79.5%":[153],"time,":[156],"compared":[157],"51.5%":[159],"GPT-4o":[161],"mini.":[162],"Our":[163],"results":[164],"highlight":[165],"distinguishing":[167],"subfield":[168],"requires":[170],"fine-grained":[171],"vocabulary,":[172],"which":[173],"is":[174],"obscured":[175],"by":[176],"semantic":[178],"smoothing":[179],"methods.":[182],"By":[183],"establishing":[184],"framework":[188],"automated":[190],"review,":[192],"transparent":[196],"reproducible":[198],"still":[201],"computationally":[203],"expensive":[204],"LLMs":[205],"specialized":[207],"tasks.":[209]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-20T00:00:00"}
