{"id":"https://openalex.org/W4403487570","doi":"https://doi.org/10.3233/faia240753","title":"Distilling the Effects of Language Model Contamination","display_name":"Distilling the Effects of Language Model Contamination","publication_year":2024,"publication_date":"2024-10-16","ids":{"openalex":"https://openalex.org/W4403487570","doi":"https://doi.org/10.3233/faia240753"},"language":"en","primary_location":{"id":"doi:10.3233/faia240753","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia240753","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA240753","source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA240753","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092966916","display_name":"Behzad Mehrbakhsh","orcid":"https://orcid.org/0000-0001-9017-989X"},"institutions":[{"id":"https://openalex.org/I60053951","display_name":"Universitat Polit\u00e8cnica de Val\u00e8ncia","ror":"https://ror.org/01460j859","country_code":"ES","type":"education","lineage":["https://openalex.org/I60053951"]},{"id":"https://openalex.org/I4210131846","display_name":"Artificial Intelligence Research Institute","ror":"https://ror.org/03c0ach84","country_code":"ES","type":"facility","lineage":["https://openalex.org/I134820265","https://openalex.org/I4210131846"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Behzad Mehrbakhsh","raw_affiliation_strings":["UPV - Universitat Polit\u00e8cnica de Val\u00e8ncia","VRAIN - Valencian Research Institute for Artificial Intelligence","ValgrAI - Valencian Graduate School and Research Network of Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"UPV - Universitat Polit\u00e8cnica de Val\u00e8ncia","institution_ids":["https://openalex.org/I60053951"]},{"raw_affiliation_string":"VRAIN - Valencian Research Institute for Artificial Intelligence","institution_ids":["https://openalex.org/I4210131846"]},{"raw_affiliation_string":"ValgrAI - Valencian Graduate School and Research Network of Artificial Intelligence","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051725651","display_name":"Fernando Mart\u00ednez\u2010Plumed","orcid":"https://orcid.org/0000-0003-2902-6477"},"institutions":[{"id":"https://openalex.org/I60053951","display_name":"Universitat Polit\u00e8cnica de Val\u00e8ncia","ror":"https://ror.org/01460j859","country_code":"ES","type":"education","lineage":["https://openalex.org/I60053951"]},{"id":"https://openalex.org/I4210131846","display_name":"Artificial Intelligence Research Institute","ror":"https://ror.org/03c0ach84","country_code":"ES","type":"facility","lineage":["https://openalex.org/I134820265","https://openalex.org/I4210131846"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Fernando Mart\u00ednez-Plumed","raw_affiliation_strings":["UPV - Universitat Polit\u00e8cnica de Val\u00e8ncia","VRAIN - Valencian Research Institute for Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"UPV - Universitat Polit\u00e8cnica de Val\u00e8ncia","institution_ids":["https://openalex.org/I60053951"]},{"raw_affiliation_string":"VRAIN - Valencian Research Institute for Artificial Intelligence","institution_ids":["https://openalex.org/I4210131846"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029864546","display_name":"Jos\u00e9 Hern\u00e1ndez\u2010Orallo","orcid":"https://orcid.org/0000-0001-9746-7632"},"institutions":[{"id":"https://openalex.org/I60053951","display_name":"Universitat Polit\u00e8cnica de Val\u00e8ncia","ror":"https://ror.org/01460j859","country_code":"ES","type":"education","lineage":["https://openalex.org/I60053951"]},{"id":"https://openalex.org/I4210131846","display_name":"Artificial Intelligence Research Institute","ror":"https://ror.org/03c0ach84","country_code":"ES","type":"facility","lineage":["https://openalex.org/I134820265","https://openalex.org/I4210131846"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Jos\u00e9 Hern\u00e1ndez-Orallo","raw_affiliation_strings":["UPV - Universitat Polit\u00e8cnica de Val\u00e8ncia","VRAIN - Valencian Research Institute for Artificial Intelligence","ValgrAI - Valencian Graduate School and Research Network of Artificial Intelligence"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"UPV - Universitat Polit\u00e8cnica de Val\u00e8ncia","institution_ids":["https://openalex.org/I60053951"]},{"raw_affiliation_string":"VRAIN - Valencian Research Institute for Artificial Intelligence","institution_ids":["https://openalex.org/I4210131846"]},{"raw_affiliation_string":"ValgrAI - Valencian Graduate School and Research Network of Artificial Intelligence","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5092966916"],"corresponding_institution_ids":["https://openalex.org/I4210131846","https://openalex.org/I60053951"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.38199688,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2442999929189682,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.2442999929189682,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/contamination","display_name":"Contamination","score":0.7952601909637451},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4468390941619873},{"id":"https://openalex.org/keywords/environmental-science","display_name":"Environmental science","score":0.33920273184776306},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.08623278141021729},{"id":"https://openalex.org/keywords/ecology","display_name":"Ecology","score":0.05307820439338684}],"concepts":[{"id":"https://openalex.org/C112570922","wikidata":"https://www.wikidata.org/wiki/Q60528603","display_name":"Contamination","level":2,"score":0.7952601909637451},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4468390941619873},{"id":"https://openalex.org/C39432304","wikidata":"https://www.wikidata.org/wiki/Q188847","display_name":"Environmental science","level":0,"score":0.33920273184776306},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.08623278141021729},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.05307820439338684}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3233/faia240753","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia240753","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA240753","source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},{"id":"pmh:oai:riunet.upv.es:10251/234434","is_oa":true,"landing_page_url":"https://riunet.upv.es/handle/10251/234434","pdf_url":null,"source":{"id":"https://openalex.org/S4306400639","display_name":"RiuNet (Universitat Polit\u00e8cnica de Val\u00e8ncia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I60053951","host_organization_name":"Universitat Polit\u00e8cnica de Val\u00e8ncia","host_organization_lineage":["https://openalex.org/I60053951"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"doi:10.3233/faia240753","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia240753","pdf_url":"https://ebooks.iospress.nl/pdf/doi/10.3233/FAIA240753","source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8199999928474426}],"awards":[{"id":"https://openalex.org/G1411269697","display_name":null,"funder_award_id":"CIPROM/2022/6","funder_id":"https://openalex.org/F4320321864","funder_display_name":"Generalitat Valenciana"},{"id":"https://openalex.org/G1622782223","display_name":null,"funder_award_id":"MCIN/AEI/10","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G1795948940","display_name":null,"funder_award_id":"952215","funder_id":"https://openalex.org/F4320321864","funder_display_name":"Generalitat Valenciana"},{"id":"https://openalex.org/G1939980667","display_name":null,"funder_award_id":"RTI2018-094403-B-C32","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G3480869486","display_name":null,"funder_award_id":"13039","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G3492592289","display_name":null,"funder_award_id":"PID2021-122830OB-C42","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G4909719005","display_name":null,"funder_award_id":"PID2021-122830OB-C42","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G5080475149","display_name":null,"funder_award_id":"10.13039","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G6298273575","display_name":null,"funder_award_id":"RTI2018","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"},{"id":"https://openalex.org/G6756106972","display_name":null,"funder_award_id":"MCIN/AEI/10","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G7043688574","display_name":null,"funder_award_id":"PID2021-122830OB-C42","funder_id":"https://openalex.org/F4320321864","funder_display_name":"Generalitat Valenciana"},{"id":"https://openalex.org/G7084143925","display_name":null,"funder_award_id":"AEI/10","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G7535663061","display_name":null,"funder_award_id":"AEI/10.","funder_id":"https://openalex.org/F4320335598","funder_display_name":"Agencia Estatal de Investigaci\u00f3n"},{"id":"https://openalex.org/G7780944301","display_name":null,"funder_award_id":"RFP2-152","funder_id":"https://openalex.org/F4320317463","funder_display_name":"Future of Life Institute"}],"funders":[{"id":"https://openalex.org/F4320317463","display_name":"Future of Life Institute","ror":null},{"id":"https://openalex.org/F4320321864","display_name":"Generalitat Valenciana","ror":"https://ror.org/0097mvx21"},{"id":"https://openalex.org/F4320335322","display_name":"European Regional Development Fund","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320335598","display_name":"Agencia Estatal de Investigaci\u00f3n","ror":null}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4403487570.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4246525302","https://openalex.org/W2355749553","https://openalex.org/W2327028314","https://openalex.org/W2070278412","https://openalex.org/W281735054","https://openalex.org/W2565462584","https://openalex.org/W2390279801"],"abstract_inverted_index":{"The":[0],"proportion":[1],"of":[2,8,36,39,50,86,98],"AI-generated":[3],"content":[4],"permeating":[5],"the":[6,34,48,54,96,116,120,123],"well":[7],"knowledge":[9],"is":[10,29],"increasing":[11],"significantly.":[12],"Large":[13],"language":[14,66],"models":[15,67],"(LLMs)":[16],"contribute":[17],"to":[18,31],"that":[19],"contamination":[20],"but":[21],"they":[22],"also":[23],"suffer":[24],"from":[25,62,64],"it.":[26],"However,":[27],"it":[28,42],"yet":[30],"be":[32,41,128],"clarified":[33],"effect":[35],"different":[37,84],"sources":[38],"error,":[40,51],"human-generated":[43,110],"or":[44,68,75],"LLM-generated.":[45],"Controlling":[46],"for":[47,88],"percentage":[49],"we":[52,81,103],"explore":[53],"impact":[55],"on":[56],"LLM":[57],"fine-tuning":[58],"when":[59],"errors":[60,99],"come":[61],"humans,":[63],"other":[65],"are":[69],"generated":[70],"randomly":[71],"using":[72],"an":[73],"aleatoric":[74],"epistemic":[76],"source.":[77],"In":[78],"this":[79],"paper,":[80],"compare":[82],"these":[83],"types":[85],"error":[87],"in-distribution":[89,109],"and":[90,100],"out-of-distribution":[91,121],"experimental":[92],"settings.":[93],"By":[94],"analysing":[95],"levels":[97],"their":[101],"distribution,":[102],"find":[104],"a":[105],"nuanced":[106],"view:":[107],"while":[108],"noise":[111,125],"seems":[112],"more":[113],"benign":[114],"than":[115],"LLM-generated":[117],"counterpart,":[118],"in":[119],"case":[122],"model-generated":[124],"may":[126],"not":[127],"necessarily":[129],"worse.":[130]},"counts_by_year":[],"updated_date":"2026-04-25T08:17:42.794288","created_date":"2025-10-10T00:00:00"}
