{"id":"https://openalex.org/W7130558586","doi":"https://doi.org/10.1109/fllm67465.2025.11390942","title":"Analysis of Linguistic Effects of Self-Consuming Training","display_name":"Analysis of Linguistic Effects of Self-Consuming Training","publication_year":2025,"publication_date":"2025-11-25","ids":{"openalex":"https://openalex.org/W7130558586","doi":"https://doi.org/10.1109/fllm67465.2025.11390942"},"language":null,"primary_location":{"id":"doi:10.1109/fllm67465.2025.11390942","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fllm67465.2025.11390942","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 3rd International Conference on Foundation and Large Language Models (FLLM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126386495","display_name":"Veronika Grigoreva","orcid":null},"institutions":[{"id":"https://openalex.org/I204722609","display_name":"Queen's University","ror":"https://ror.org/02y72wh86","country_code":"CA","type":"education","lineage":["https://openalex.org/I204722609"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Veronika Grigoreva","raw_affiliation_strings":["Queen&#x2019;s University,School of Computing,Kingston,Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Queen&#x2019;s University,School of Computing,Kingston,Canada","institution_ids":["https://openalex.org/I204722609"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054896491","display_name":"Catherine Stinson","orcid":"https://orcid.org/0000-0003-2770-7922"},"institutions":[{"id":"https://openalex.org/I204722609","display_name":"Queen's University","ror":"https://ror.org/02y72wh86","country_code":"CA","type":"education","lineage":["https://openalex.org/I204722609"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Catherine Stinson","raw_affiliation_strings":["Queen&#x2019;s University,School of Computing,Department of Philosophy,Kingston,Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Queen&#x2019;s University,School of Computing,Department of Philosophy,Kingston,Canada","institution_ids":["https://openalex.org/I204722609"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5126407720","display_name":"Christian Muise","orcid":null},"institutions":[{"id":"https://openalex.org/I204722609","display_name":"Queen's University","ror":"https://ror.org/02y72wh86","country_code":"CA","type":"education","lineage":["https://openalex.org/I204722609"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Christian Muise","raw_affiliation_strings":["Queen&#x2019;s University,School of Computing,Kingston,Canada"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Queen&#x2019;s University,School of Computing,Kingston,Canada","institution_ids":["https://openalex.org/I204722609"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.74643853,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1196","last_page":"1202"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12488","display_name":"Mental Health via Writing","score":0.20550000667572021,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12488","display_name":"Mental Health via Writing","score":0.20550000667572021,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.11980000138282776,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.1096000000834465,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5192999839782715},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5091000199317932},{"id":"https://openalex.org/keywords/affect","display_name":"Affect (linguistics)","score":0.44179999828338623},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.438400000333786},{"id":"https://openalex.org/keywords/language-acquisition","display_name":"Language acquisition","score":0.41679999232292175},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4090999960899353},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.3560999929904938}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5679000020027161},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.5558000206947327},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5192999839782715},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5170000195503235},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5091000199317932},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4738999903202057},{"id":"https://openalex.org/C2776035688","wikidata":"https://www.wikidata.org/wiki/Q1606558","display_name":"Affect (linguistics)","level":2,"score":0.44179999828338623},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.438400000333786},{"id":"https://openalex.org/C74672266","wikidata":"https://www.wikidata.org/wiki/Q815859","display_name":"Language acquisition","level":2,"score":0.41679999232292175},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4131999909877777},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4090999960899353},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3560999929904938},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.35589998960494995},{"id":"https://openalex.org/C2778355321","wikidata":"https://www.wikidata.org/wiki/Q17079427","display_name":"Identity (music)","level":2,"score":0.3495999872684479},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3249000012874603},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.32260000705718994},{"id":"https://openalex.org/C2779313563","wikidata":"https://www.wikidata.org/wiki/Q17072565","display_name":"On Language","level":2,"score":0.30379998683929443},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.290800005197525},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.274399995803833},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.25839999318122864},{"id":"https://openalex.org/C172205122","wikidata":"https://www.wikidata.org/wiki/Q777864","display_name":"Linguistic description","level":2,"score":0.25760000944137573}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/fllm67465.2025.11390942","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fllm67465.2025.11390942","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 3rd International Conference on Foundation and Large Language Models (FLLM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8263529539108276,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":1,"referenced_works":["https://openalex.org/W4385245566"],"related_works":[],"abstract_inverted_index":{"Modern":[0],"language":[1,37,129],"models":[2],"train":[3],"on":[4,29,40,78,114,127],"vast":[5],"arrays":[6],"of":[7,26,34,43,101],"data":[8,16],"scraped":[9],"from":[10],"the":[11,14,23,30,32,41,54,72,84,102],"internet,":[12,31],"with":[13],"text":[15,92],"itself":[17],"often":[18],"only":[19],"lightly":[20],"filtered.":[21],"Considering":[22],"increasing":[24],"amounts":[25],"machine-generated":[27],"texts":[28],"possibility":[33],"a":[35,75],"new":[36],"model":[38],"training":[39,65,124],"outputs":[42,85],"previous":[44,50],"generations":[45],"is":[46],"exceedingly":[47],"high.":[48],"Following":[49],"work,":[51],"we":[52,69,82,117],"use":[53],"term":[55],"\"self-consuming":[56],"training\"":[57],"for":[58],"this":[59],"process.":[60],"To":[61],"analyse":[62],"how":[63],"self-consuming":[64,123],"might":[66,125],"affect":[67],"LLMs,":[68],"repeatedly":[70],"simulate":[71],"process":[73],"using":[74,105],"GPT-2-based":[76],"LLM":[77],"several":[79],"datasets.":[80],"Afterwards,":[81],"score":[83],"across":[86],"multiple":[87],"attributes,":[88],"including":[89],"quality,":[90],"overall":[91],"diversity,":[93],"as":[94,96],"well":[95],"emotion,":[97],"toxicity,":[98],"perceived":[99],"identity":[100],"author,":[103],"etc.,":[104],"both":[106],"established":[107],"metrics":[108],"and":[109],"fine-tuned":[110],"classifier":[111],"models.":[112,130],"Based":[113],"these":[115],"scores,":[116],"draw":[118],"out":[119],"some":[120],"potential":[121],"effects":[122],"have":[126],"modern":[128]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-20T00:00:00"}
