{"id":"https://openalex.org/W4412377846","doi":"https://doi.org/10.1145/3726302.3731969","title":"Data-efficient Meta-models for Evaluation of Context-based Questions and Answers in LLMs","display_name":"Data-efficient Meta-models for Evaluation of Context-based Questions and Answers in LLMs","publication_year":2025,"publication_date":"2025-07-13","ids":{"openalex":"https://openalex.org/W4412377846","doi":"https://doi.org/10.1145/3726302.3731969"},"language":"en","primary_location":{"id":"doi:10.1145/3726302.3731969","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3731969","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3731969","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3731969","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5118969347","display_name":"Julia Belikova","orcid":null},"institutions":[{"id":"https://openalex.org/I153845743","display_name":"Moscow Institute of Physics and Technology","ror":"https://ror.org/00v0z9322","country_code":"RU","type":"education","lineage":["https://openalex.org/I153845743"]}],"countries":["RU"],"is_corresponding":false,"raw_author_name":"Julia Belikova","raw_affiliation_strings":["Sber AI Lab, Moscow, Russian Federation and Moscow Institute of Physics and Technology, Dolgoprudny, Russian Federation"],"raw_orcid":"https://orcid.org/0009-0007-7829-1249","affiliations":[{"raw_affiliation_string":"Sber AI Lab, Moscow, Russian Federation and Moscow Institute of Physics and Technology, Dolgoprudny, Russian Federation","institution_ids":["https://openalex.org/I153845743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021254770","display_name":"Konstantin Polev","orcid":"https://orcid.org/0000-0002-0504-5940"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Konstantin Polev","raw_affiliation_strings":["Sber AI Lab, Moscow, Russian Federation"],"raw_orcid":"https://orcid.org/0000-0002-0504-5940","affiliations":[{"raw_affiliation_string":"Sber AI Lab, Moscow, Russian Federation","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5118969348","display_name":"Rauf Parchiev","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rauf Parchiev","raw_affiliation_strings":["Sber AI Lab, Moscow, Russian Federation"],"raw_orcid":"https://orcid.org/0009-0003-1678-1243","affiliations":[{"raw_affiliation_string":"Sber AI Lab, Moscow, Russian Federation","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5083442032","display_name":"D. Simakov","orcid":"https://orcid.org/0009-0003-3199-479X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dmitry Simakov","raw_affiliation_strings":["Sber AI Lab, Moscow, Russian Federation"],"raw_orcid":"https://orcid.org/0009-0003-3199-479X","affiliations":[{"raw_affiliation_string":"Sber AI Lab, Moscow, Russian Federation","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07475518,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4385","last_page":"4389"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9973999857902527,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9767000079154968,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10679","display_name":"Service-Oriented Architecture and Web Services","score":0.9751999974250793,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6432949304580688},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5441554188728333},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.4068794846534729},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.1217532753944397}],"concepts":[{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6432949304580688},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5441554188728333},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4068794846534729},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.1217532753944397},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3726302.3731969","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3731969","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3731969","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3726302.3731969","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3731969","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3731969","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412377846.pdf","grobid_xml":"https://content.openalex.org/works/W4412377846.grobid-xml"},"referenced_works_count":19,"referenced_works":["https://openalex.org/W3084095723","https://openalex.org/W3171920444","https://openalex.org/W3214646361","https://openalex.org/W4309674289","https://openalex.org/W4380353763","https://openalex.org/W4385573338","https://openalex.org/W4389524022","https://openalex.org/W4401042808","https://openalex.org/W4401043010","https://openalex.org/W4402670439","https://openalex.org/W4402671653","https://openalex.org/W4404351611","https://openalex.org/W4404783306","https://openalex.org/W4404792833","https://openalex.org/W4405968021","https://openalex.org/W4406170795","https://openalex.org/W4411113142","https://openalex.org/W4411630045","https://openalex.org/W6852874933"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"and":[4,38,84],"Retrieval-Augmented":[5],"Generation":[6],"(RAG)":[7],"systems":[8],"are":[9],"increasingly":[10],"deployed":[11],"in":[12,22,50,149],"industry":[13],"applications,":[14],"yet":[15],"their":[16,42],"reliability":[17],"remains":[18],"hampered":[19],"by":[20,62],"challenges":[21],"detecting":[23],"hallucinations.":[24],"While":[25],"supervised":[26],"state-of-the-art":[27],"(SOTA)":[28],"methods":[29],"that":[30,120],"leverage":[31],"LLM":[32],"hidden":[33],"states-such":[34],"as":[35],"activation":[36],"tracing":[37],"representation":[39],"analysis-show":[40],"promise,":[41],"dependence":[43],"on":[44,114],"extensively":[45],"annotated":[46],"datasets":[47],"limits":[48],"scalability":[49],"real-world":[51],"applications.":[52],"This":[53],"paper":[54],"addresses":[55],"the":[56,64,139],"critical":[57],"bottleneck":[58],"of":[59,66,141],"data":[60,69],"annotation":[61],"investigating":[63],"feasibility":[65],"reducing":[67],"training":[68,134],"requirements":[70],"for":[71,145],"two":[72],"SOTA":[73],"hallucination":[74],"detection":[75],"frameworks:":[76],"Lookback":[77],"Lens,":[78],"which":[79,87],"analyzes":[80],"attention":[81],"head":[82],"dynamics,":[83],"probing-based":[85],"approaches,":[86],"decode":[88],"internal":[89],"model":[90],"representations.":[91],"We":[92],"propose":[93],"a":[94],"methodology":[95],"combining":[96],"efficient":[97],"classification":[98],"algorithms":[99],"with":[100,131],"dimensionality":[101],"reduction":[102],"techniques":[103],"to":[104,126],"minimize":[105],"sample":[106],"size":[107],"demands":[108],"while":[109],"maintaining":[110],"competitive":[111],"performance.":[112],"Evaluations":[113],"standardized":[115],"question-answering":[116],"RAG":[117],"benchmarks":[118],"show":[119],"our":[121],"approach":[122],"achieves":[123],"performance":[124],"comparable":[125],"strong":[127],"proprietary":[128],"LLM-based":[129],"baselines":[130],"only":[132],"250":[133],"samples.":[135],"These":[136],"results":[137],"highlight":[138],"potential":[140],"lightweight,":[142],"data-efficient":[143],"paradigms":[144],"industrial":[146],"deployment,":[147],"particularly":[148],"annotation-constrained":[150],"scenarios.":[151]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
