{"id":"https://openalex.org/W4401863498","doi":"https://doi.org/10.1145/3637528.3671882","title":"Neural Retrievers are Biased Towards LLM-Generated Content","display_name":"Neural Retrievers are Biased Towards LLM-Generated Content","publication_year":2024,"publication_date":"2024-08-24","ids":{"openalex":"https://openalex.org/W4401863498","doi":"https://doi.org/10.1145/3637528.3671882"},"language":"en","primary_location":{"id":"doi:10.1145/3637528.3671882","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3637528.3671882","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075518954","display_name":"Sunhao Dai","orcid":"https://orcid.org/0009-0002-7549-0860"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Sunhao Dai","raw_affiliation_strings":["Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102010069","display_name":"Yuqi Zhou","orcid":"https://orcid.org/0009-0008-2453-9138"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuqi Zhou","raw_affiliation_strings":["Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004759804","display_name":"Liang Pang","orcid":"https://orcid.org/0000-0003-1161-8546"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liang Pang","raw_affiliation_strings":["CAS Key Laboratory of AI Safety Institute of Computing Technology Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"CAS Key Laboratory of AI Safety Institute of Computing Technology Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210090176","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102612071","display_name":"Weihao Liu","orcid":"https://orcid.org/0009-0003-0481-0246"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weihao Liu","raw_affiliation_strings":["Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103895382","display_name":"Xiaolin Hu","orcid":"https://orcid.org/0009-0002-5493-5779"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaolin Hu","raw_affiliation_strings":["Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028071674","display_name":"Yong Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yong Liu","raw_affiliation_strings":["Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100320847","display_name":"Xiao Zhang","orcid":"https://orcid.org/0000-0001-7397-5632"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiao Zhang","raw_affiliation_strings":["Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002333577","display_name":"Gang Wang","orcid":"https://orcid.org/0000-0002-8795-8953"},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Wang","raw_affiliation_strings":["Noah's Ark Lab, Huawei, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Noah's Ark Lab, Huawei, Shenzhen, China","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5020766468","display_name":"Jun Xu","orcid":"https://orcid.org/0000-0001-7170-111X"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Xu","raw_affiliation_strings":["Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Gaoling School of Artificial Intelligence, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5075518954"],"corresponding_institution_ids":["https://openalex.org/I78988378"],"apc_list":null,"apc_paid":null,"fwci":9.0354,"has_fulltext":false,"cited_by_count":26,"citation_normalized_percentile":{"value":0.9822974,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"526","last_page":"537"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6148236393928528},{"id":"https://openalex.org/keywords/content","display_name":"Content (measure theory)","score":0.45938780903816223},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07789525389671326}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6148236393928528},{"id":"https://openalex.org/C2778152352","wikidata":"https://www.wikidata.org/wiki/Q5165061","display_name":"Content (measure theory)","level":2,"score":0.45938780903816223},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07789525389671326},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3637528.3671882","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3637528.3671882","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W8870360","https://openalex.org/W1975570633","https://openalex.org/W2098502158","https://openalex.org/W2142537246","https://openalex.org/W2144211451","https://openalex.org/W2187089797","https://openalex.org/W2750779823","https://openalex.org/W2912924812","https://openalex.org/W2922386288","https://openalex.org/W3007780968","https://openalex.org/W3008374555","https://openalex.org/W3134665270","https://openalex.org/W3137305332","https://openalex.org/W3154670582","https://openalex.org/W3166731436","https://openalex.org/W4213009331","https://openalex.org/W4252076394","https://openalex.org/W4293304858","https://openalex.org/W4298110867","https://openalex.org/W4312943126","https://openalex.org/W4368755500","https://openalex.org/W4385570537","https://openalex.org/W4385571551","https://openalex.org/W4385638369","https://openalex.org/W4389524022","https://openalex.org/W4392669753"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Recently,":[0],"the":[1,10,29,37,45,61,68,124,128,141,148,156,184,195,207,213,229,241,244],"emergence":[2],"of":[3,12,25,86,118,158,238],"large":[4],"language":[5],"models":[6,88,106,178],"(LLMs)":[7],"has":[8],"revolutionized":[9],"paradigm":[11],"information":[13],"retrieval":[14,105,177],"(IR)":[15],"applications,":[16],"especially":[17],"in":[18,36,89,120,240],"web":[19],"search,":[20],"by":[21,53,60],"generating":[22],"vast":[23],"amounts":[24],"human-like":[26],"texts":[27,96,164],"on":[28],"Internet.":[30],"As":[31],"a":[32,42,72,83,190,224],"result,":[33],"IR":[34,69,87,230,239],"systems":[35,70],"LLM":[38,242],"era":[39],"are":[40,48,97,249],"facing":[41],"new":[43,247],"challenge:":[44],"indexed":[46],"documents":[47,66,111],"now":[49],"not":[50,138],"only":[51],"written":[52],"human":[54],"beings":[55],"but":[56,145],"also":[57,188],"automatically":[58],"generated":[59],"LLMs.":[62],"How":[63],"these":[64],"LLM-generated":[65,95,110,125,163],"influence":[67],"is":[71,137],"pressing":[73],"and":[74,94,198,217,232],"still":[75],"unexplored":[76],"question.":[77],"In":[78],"this":[79,116,135],"work,":[80],"we":[81,132,187,205],"conduct":[82],"quantitative":[84],"evaluation":[85],"scenarios":[90],"where":[91],"both":[92],"human-written":[93],"involved.":[98],"Surprisingly,":[99],"our":[100,219],"findings":[101,220],"indicate":[102,161],"that":[103,134,162],"neural":[104,121,143,150,176],"tend":[107],"to":[108,115,140,147,179,228],"rank":[109],"higher.":[112],"We":[113],"refer":[114],"category":[117],"biases":[119],"retrievers":[122],"towards":[123],"content":[126],"as":[127,223],"source":[129,185,215],"bias.":[130],"Moreover,":[131],"discover":[133],"bias":[136,216],"confined":[139],"first-stage":[142],"retrievers,":[144],"extends":[146],"second-stage":[149],"re-rankers.":[151],"Then,":[152],"in-depth":[153],"analyses":[154],"from":[155,212],"perspective":[157],"text":[159],"compression":[160],"exhibit":[165],"more":[166],"focused":[167],"semantics":[168],"with":[169],"less":[170],"noise,":[171],"making":[172],"it":[173],"easier":[174],"for":[175,194],"semantic":[180],"match.":[181],"To":[182,234],"mitigate":[183],"bias,":[186],"propose":[189],"plug-and-play":[191],"debiased":[192],"constraint":[193],"optimization":[196],"objective,":[197],"experimental":[199],"results":[200],"show":[201],"its":[202],"effectiveness.":[203],"Finally,":[204],"discuss":[206],"potential":[208],"severe":[209],"concerns":[210],"stemming":[211],"observed":[214],"hope":[218],"can":[221],"serve":[222],"critical":[225],"wake-up":[226],"call":[227],"community":[231],"beyond.":[233],"facilitate":[235],"future":[236],"explorations":[237],"era,":[243],"constructed":[245],"two":[246],"benchmarks":[248],"available":[250],"at":[251],"https://github.com/KID-22/Source-Bias.":[252]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":19},{"year":2024,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
