{"id":"https://openalex.org/W7154336586","doi":"https://doi.org/10.48550/arxiv.2604.11092","title":"ARHN: Answer-Centric Relabeling of Hard Negatives with Open-Source LLMs for Dense Retrieval","display_name":"ARHN: Answer-Centric Relabeling of Hard Negatives with Open-Source LLMs for Dense Retrieval","publication_year":2026,"publication_date":"2026-04-13","ids":{"openalex":"https://openalex.org/W7154336586","doi":"https://doi.org/10.48550/arxiv.2604.11092"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.11092","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.11092","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.11092","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133605589","display_name":"Hyewon Choi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Choi, Hyewon","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130119352","display_name":"Jooyoung Choi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choi, Jooyoung","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133570422","display_name":"Hansol Jang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jang, Hansol","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070361261","display_name":"Hyun Chul Kim","orcid":"https://orcid.org/0000-0003-4634-4522"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Hyun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081234662","display_name":"Chulmin Yun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yun, Chulmin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089943823","display_name":"Changwook Jun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jun, ChangWook","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133560009","display_name":"Stanley Jungkyu Choi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choi, Stanley Jungkyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5133605589"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6417999863624573,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6417999863624573,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.09989999979734421,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.07370000332593918,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.6837999820709229},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6638000011444092},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.6266999840736389},{"id":"https://openalex.org/keywords/false-positives-and-false-negatives","display_name":"False positives and false negatives","score":0.6247000098228455},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6205000281333923},{"id":"https://openalex.org/keywords/snippet","display_name":"Snippet","score":0.5414000153541565},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4553999900817871},{"id":"https://openalex.org/keywords/false-positive-paradox","display_name":"False positive paradox","score":0.44830000400543213},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.4375}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7281000018119812},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.6837999820709229},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6638000011444092},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.6266999840736389},{"id":"https://openalex.org/C112789634","wikidata":"https://www.wikidata.org/wiki/Q18207010","display_name":"False positives and false negatives","level":3,"score":0.6247000098228455},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6205000281333923},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5655999779701233},{"id":"https://openalex.org/C2777822670","wikidata":"https://www.wikidata.org/wiki/Q1120538","display_name":"Snippet","level":2,"score":0.5414000153541565},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45660001039505005},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4553999900817871},{"id":"https://openalex.org/C64869954","wikidata":"https://www.wikidata.org/wiki/Q1859747","display_name":"False positive paradox","level":2,"score":0.44830000400543213},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4375},{"id":"https://openalex.org/C86037889","wikidata":"https://www.wikidata.org/wiki/Q4330127","display_name":"Learning to rank","level":3,"score":0.4169999957084656},{"id":"https://openalex.org/C207062185","wikidata":"https://www.wikidata.org/wiki/Q902085","display_name":"Homonym (biology)","level":3,"score":0.39879998564720154},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.392300009727478},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3765999972820282},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.3456000089645386},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.33959999680519104},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3391000032424927},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.336899995803833},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.329800009727478},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3285999894142151},{"id":"https://openalex.org/C182306322","wikidata":"https://www.wikidata.org/wiki/Q1779371","display_name":"Order (exchange)","level":2,"score":0.31439998745918274},{"id":"https://openalex.org/C130440534","wikidata":"https://www.wikidata.org/wiki/Q14946528","display_name":"Conflation","level":2,"score":0.2904999852180481},{"id":"https://openalex.org/C2777080924","wikidata":"https://www.wikidata.org/wiki/Q334667","display_name":"Storyboard","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C124975894","wikidata":"https://www.wikidata.org/wiki/Q7293290","display_name":"Ranking SVM","level":3,"score":0.26409998536109924},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.2578999996185303},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.2567000091075897},{"id":"https://openalex.org/C2778956030","wikidata":"https://www.wikidata.org/wiki/Q5142477","display_name":"Cold start (automotive)","level":2,"score":0.2540999948978424},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.11092","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.11092","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.11092","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.11092","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Neural":[0],"retrievers":[1],"are":[2,36,138],"often":[3],"trained":[4],"on":[5,168,215],"large-scale":[6,228],"triplet":[7],"data":[8],"comprising":[9],"a":[10,12,16,64,94,220],"query,":[11],"positive":[13,137],"passage,":[14],"and":[15,29,51,179,200,222],"set":[17,122,160],"of":[18,61],"hard":[19,73],"negatives.":[20],"In":[21,80,109],"practice,":[22],"hard-negative":[23],"mining":[24],"can":[25,52],"introduce":[26],"false":[27,198],"negatives":[28,199,203],"other":[30],"ambiguous":[31,163,202],"negatives,":[32],"including":[33],"passages":[34,125,144],"that":[35,67,101,152,195],"relevant":[37],"or":[38,98],"contain":[39,153],"partial":[40],"answers":[41],"to":[42,71,92,99,123,129,140,161],"the":[43,81,90,102,110,120,130,135,147,158,169,184],"query.":[44,131],"Such":[45],"label":[46],"noise":[47],"yields":[48,204],"inconsistent":[49],"supervision":[50,206],"degrade":[53],"retrieval":[54,210],"effectiveness.":[55],"We":[56,165],"propose":[57],"ARHN":[58,88,113,149,167,218],"(Answer-centric":[59],"Relabeling":[60],"Hard":[62],"Negatives),":[63],"two-stage":[65],"framework":[66],"leverages":[68],"open-source":[69,216],"LLMs":[70],"refine":[72],"negative":[74,159],"samples":[75],"using":[76],"answer-centric":[77],"relevance":[78],"signals.":[79],"first":[82],"stage,":[83,112],"for":[84,207,227],"each":[85],"query-passage":[86],"pair,":[87],"prompts":[89],"LLM":[91],"generate":[93],"passage-grounded":[95],"answer":[96,155],"snippet":[97,156],"indicate":[100],"passage":[103],"does":[104],"not":[105],"support":[106],"an":[107,115,154],"answer.":[108],"second":[111],"applies":[114],"LLM-based":[116],"listwise":[117],"ranking":[118],"over":[119,189],"candidate":[121],"order":[124],"by":[126],"direct":[127],"answerability":[128],"Passages":[132],"ranked":[133,145],"above":[134],"original":[136],"relabeled":[139],"additional":[141],"positives.":[142],"Among":[143],"below":[146],"positive,":[148],"excludes":[150],"any":[151],"from":[157],"avoid":[162],"supervision.":[164],"evaluated":[166],"BEIR":[170],"benchmark":[171],"under":[172],"three":[173],"configurations:":[174],"relabeling":[175,197],"only,":[176,178],"filtering":[177,201],"their":[180],"combination.":[181],"Across":[182],"datasets,":[183],"combined":[185],"strategy":[186],"consistently":[187],"improves":[188],"either":[190],"step":[191],"in":[192],"isolation,":[193],"indicating":[194],"jointly":[196],"cleaner":[205],"training":[208],"neural":[209],"models.":[211],"By":[212],"relying":[213],"strictly":[214],"models,":[217],"establishes":[219],"cost-effective":[221],"scalable":[223],"refinement":[224],"pipeline":[225],"suitable":[226],"training.":[229]},"counts_by_year":[],"updated_date":"2026-04-29T09:16:38.111599","created_date":"2026-04-15T00:00:00"}
