{"id":"https://openalex.org/W4416017518","doi":"https://doi.org/10.1145/3746252.3760934","title":"Open-Source LLM-based Relevance Assessment vs. Highly Reliable Manual Relevance Assessment: A Case Study","display_name":"Open-Source LLM-based Relevance Assessment vs. Highly Reliable Manual Relevance Assessment: A Case Study","publication_year":2025,"publication_date":"2025-11-08","ids":{"openalex":"https://openalex.org/W4416017518","doi":"https://doi.org/10.1145/3746252.3760934"},"language":null,"primary_location":{"id":"doi:10.1145/3746252.3760934","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746252.3760934","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 34th ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5023595778","display_name":"Tetsuya Sakai","orcid":"https://orcid.org/0000-0002-6720-963X"},"institutions":[{"id":"https://openalex.org/I150744194","display_name":"Waseda University","ror":"https://ror.org/00ntfnx83","country_code":"JP","type":"education","lineage":["https://openalex.org/I150744194"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Tetsuya Sakai","raw_affiliation_strings":["Waseda University/Naver Corporation, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0002-6720-963X","affiliations":[{"raw_affiliation_string":"Waseda University/Naver Corporation, Tokyo, Japan","institution_ids":["https://openalex.org/I150744194"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120301670","display_name":"Khant Myoe Rain","orcid":"https://orcid.org/0009-0004-2301-9537"},"institutions":[{"id":"https://openalex.org/I150744194","display_name":"Waseda University","ror":"https://ror.org/00ntfnx83","country_code":"JP","type":"education","lineage":["https://openalex.org/I150744194"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Khant Myoe Rain","raw_affiliation_strings":["Waseda University, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0009-0004-2301-9537","affiliations":[{"raw_affiliation_string":"Waseda University, Tokyo, Japan","institution_ids":["https://openalex.org/I150744194"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092998360","display_name":"Rikiya Takehi","orcid":"https://orcid.org/0009-0003-6336-8064"},"institutions":[{"id":"https://openalex.org/I150744194","display_name":"Waseda University","ror":"https://ror.org/00ntfnx83","country_code":"JP","type":"education","lineage":["https://openalex.org/I150744194"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Rikiya Takehi","raw_affiliation_strings":["Waseda University, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0009-0003-6336-8064","affiliations":[{"raw_affiliation_string":"Waseda University, Tokyo, Japan","institution_ids":["https://openalex.org/I150744194"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091105421","display_name":"Sijie Tao","orcid":"https://orcid.org/0000-0002-6751-5303"},"institutions":[{"id":"https://openalex.org/I150744194","display_name":"Waseda University","ror":"https://ror.org/00ntfnx83","country_code":"JP","type":"education","lineage":["https://openalex.org/I150744194"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Sijie Tao","raw_affiliation_strings":["Waseda University, Tokyo, Japan"],"raw_orcid":"https://orcid.org/0000-0002-6751-5303","affiliations":[{"raw_affiliation_string":"Waseda University, Tokyo, Japan","institution_ids":["https://openalex.org/I150744194"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028145173","display_name":"Young-In Song","orcid":"https://orcid.org/0000-0003-0669-005X"},"institutions":[{"id":"https://openalex.org/I60922564","display_name":"Naver (South Korea)","ror":"https://ror.org/04nzrnx83","country_code":"KR","type":"company","lineage":["https://openalex.org/I60922564"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Young-In Song","raw_affiliation_strings":["Naver Corporation, Seoul, Republic of Korea"],"raw_orcid":"https://orcid.org/0000-0003-0669-005X","affiliations":[{"raw_affiliation_string":"Naver Corporation, Seoul, Republic of Korea","institution_ids":["https://openalex.org/I60922564"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5023595778"],"corresponding_institution_ids":["https://openalex.org/I150744194"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.45536831,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5186","last_page":"5190"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9465000033378601,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.9465000033378601,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.018300000578165054,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13976","display_name":"Web visibility and informetrics","score":0.0038999998942017555,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.7864000201225281},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.7552000284194946},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7195000052452087},{"id":"https://openalex.org/keywords/complement","display_name":"Complement (music)","score":0.5058000087738037},{"id":"https://openalex.org/keywords/learning-to-rank","display_name":"Learning to rank","score":0.46630001068115234}],"concepts":[{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.7864000201225281},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.7552000284194946},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7195000052452087},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6504999995231628},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.527400016784668},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.5058000087738037},{"id":"https://openalex.org/C86037889","wikidata":"https://www.wikidata.org/wiki/Q4330127","display_name":"Learning to rank","level":3,"score":0.46630001068115234},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43970000743865967},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42410001158714294},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.4018000066280365},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2892000079154968},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.27070000767707825},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2689000070095062},{"id":"https://openalex.org/C12174686","wikidata":"https://www.wikidata.org/wiki/Q1058438","display_name":"Risk assessment","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.2549999952316284}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746252.3760934","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746252.3760934","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 34th ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W2017292914","https://openalex.org/W2058896506","https://openalex.org/W2068098598","https://openalex.org/W2137274315","https://openalex.org/W2967489092","https://openalex.org/W4385688511","https://openalex.org/W4400526908","https://openalex.org/W4405143965","https://openalex.org/W4408811965","https://openalex.org/W4412377897","https://openalex.org/W4412377949"],"related_works":[],"abstract_inverted_index":{"There":[0],"is":[1],"currently":[2],"a":[3,47],"controversy":[4],"as":[5],"to":[6,78],"whether":[7],"LLM-based":[8,90,111,128],"relevance":[9,14,66],"assessment":[10,15,91,99,112,124,129],"can":[11,125],"replace":[12,97],"manual":[13,123],"for":[16,46,101],"evaluating":[17],"search":[18,58],"engines":[19],"accurately":[20],"at":[21,23,36],"least":[22],"the":[24,37,69,83],"run":[25],"level":[26,40],"(e.g.,":[27,41],"ranking":[28,102],"TREC":[29],"runs":[30],"by":[31],"mean":[32,107],"nDCG)":[33],"if":[34],"not":[35],"individual":[38],"topic":[39],"computing":[42],"an":[43,55],"nDCG":[44],"score":[45],"Search":[48],"Engine":[49],"Result":[50],"Page).":[51],"This":[52],"study":[53],"utilises":[54],"NTCIR":[56],"web":[57],"test":[59],"collection":[60],"that":[61,89,122],"features":[62],"highly":[63],"reliable":[64],"human":[65,98],"labels":[67],"(reflecting":[68],"collective":[70],"view":[71],"of":[72,106,137],"eight":[73],"independent":[74],"assessors":[75],"per":[76],"topic)":[77],"complement":[79],"prior":[80],"findings":[81],"from":[82],"skeptic":[84],"camp.":[85],"Our":[86],"experiments":[87],"show":[88],"(using":[92],"Llama":[93],"and":[94],"Qwen)":[95],"cannot":[96],"even":[100],"systems":[103],"in":[104,135],"terms":[105,136],"nDCG.":[108],"More":[109],"importantly,":[110],"lacks":[113],"discriminative":[114],"power:":[115],"it":[116],"misses":[117],"many":[118],"statistically":[119],"significant":[120],"differences":[121],"detect.":[126],"Furthermore,":[127],"occasionally":[130],"yields":[131],"potential":[132],"false":[133],"alarms":[134],"statistical":[138],"significance,":[139],"which":[140],"may":[141],"let":[142],"researchers":[143],"reach":[144],"incorrect":[145],"conclusions.":[146]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-11-08T00:00:00"}
