{"id":"https://openalex.org/W7139913897","doi":"https://doi.org/10.48550/arxiv.2603.18005","title":"Negative Sampling Techniques in Information Retrieval: A Survey","display_name":"Negative Sampling Techniques in Information Retrieval: A Survey","publication_year":2026,"publication_date":"2026-01-09","ids":{"openalex":"https://openalex.org/W7139913897","doi":"https://doi.org/10.48550/arxiv.2603.18005"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.18005","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18005","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.18005","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130236318","display_name":"Laurin Wischounig","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wischounig, Laurin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130225432","display_name":"Abdelrahman Abdallah","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abdallah, Abdelrahman","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130226687","display_name":"Adam Jatowt","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jatowt, Adam","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5130236318"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.16359999775886536,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.16359999775886536,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.15649999678134918,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.11699999868869781,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4975999891757965},{"id":"https://openalex.org/keywords/taxonomy","display_name":"Taxonomy (biology)","score":0.4864000082015991},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.47269999980926514},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.43540000915527344},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4325999915599823},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4163999855518341},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.3467000126838684}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6934999823570251},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6269000172615051},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4975999891757965},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.4864000082015991},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.47269999980926514},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.45080000162124634},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.43540000915527344},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4325999915599823},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4163999855518341},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3978999853134155},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3467000126838684},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.3012999892234802},{"id":"https://openalex.org/C2982736386","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Statistical learning","level":2,"score":0.28290000557899475},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.28130000829696655},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.27239999175071716},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.260699987411499}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.18005","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18005","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.18005","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18005","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.5179617404937744,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Information":[0],"Retrieval":[1],"(IR)":[2],"is":[3,38,67],"fundamental":[4],"to":[5,19,30,111],"many":[6],"modern":[7,71],"NLP":[8,72],"applications.":[9],"The":[10,120],"rise":[11],"of":[12,41,57,77,134],"dense":[13,33,62],"retrieval":[14],"(DR),":[15],"using":[16],"neural":[17],"networks":[18],"learn":[20],"semantic":[21],"vector":[22],"representations,":[23],"has":[24],"significantly":[25],"advanced":[26],"IR":[27],"performance.":[28],"Central":[29],"training":[31],"effective":[32],"retrievers":[34],"through":[35],"contrastive":[36],"learning":[37],"the":[39,68,75,132],"selection":[40],"informative":[42],"negative":[43,58],"samples.":[44],"Synthesizing":[45],"35":[46],"seminal":[47],"papers,":[48],"this":[49],"survey":[50,121],"provides":[51],"a":[52,92],"comprehensive":[53],"and":[54,74,101,117,127],"up-to-date":[55],"overview":[56],"sampling":[59],"techniques":[60,96],"in":[61,87],"IR.":[63],"Our":[64],"unique":[65],"contribution":[66],"focus":[69],"on":[70],"applications":[73],"inclusion":[76],"recent":[78],"Large":[79],"Language":[80],"Model":[81],"(LLM)-driven":[82],"methods,":[83],"an":[84],"area":[85],"absent":[86],"prior":[88],"reviews.":[89],"We":[90,104],"propose":[91],"taxonomy":[93],"that":[94],"categorizes":[95],"including":[97],"random,":[98],"static/dynamically":[99],"mined,":[100],"synthetic":[102,136],"datasets.":[103],"then":[105],"analyze":[106],"these":[107],"approaches":[108],"with":[109],"respect":[110],"trade-offs":[112],"between":[113],"effectiveness,":[114],"computational":[115],"cost,":[116],"implementation":[118],"difficulty.":[119],"concludes":[122],"by":[123],"outlining":[124],"current":[125],"challenges":[126],"promising":[128],"future":[129],"directions":[130],"for":[131],"use":[133],"LLM-generated":[135],"data.":[137]},"counts_by_year":[],"updated_date":"2026-03-21T06:36:02.116451","created_date":"2026-03-21T00:00:00"}
