{"id":"https://openalex.org/W4408355147","doi":"https://doi.org/10.1109/icassp49660.2025.10888877","title":"A Weighted Cross-entropy Loss for Mitigating LLM Hallucinations in Cross-lingual Continual Pretraining","display_name":"A Weighted Cross-entropy Loss for Mitigating LLM Hallucinations in Cross-lingual Continual Pretraining","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408355147","doi":"https://doi.org/10.1109/icassp49660.2025.10888877"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10888877","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888877","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007440397","display_name":"Yuantao Fan","orcid":"https://orcid.org/0000-0002-3034-6630"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuantao Fan","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068728736","display_name":"Ruifan Li","orcid":"https://orcid.org/0000-0002-3543-6272"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruifan Li","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100643936","display_name":"Guangwei Zhang","orcid":"https://orcid.org/0000-0003-3164-9921"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guangwei Zhang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100705849","display_name":"Chuan Shi","orcid":"https://orcid.org/0000-0002-3734-0266"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chuan Shi","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100351289","display_name":"Xiaojie Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaojie Wang","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,Beijing,China","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.067,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.73758352,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13397","display_name":"Hallucinations in medical conditions","score":0.8216999769210815,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T13397","display_name":"Hallucinations in medical conditions","score":0.8216999769210815,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10949","display_name":"Genetic Neurodegenerative Diseases","score":0.7623000144958496,"subfield":{"id":"https://openalex.org/subfields/2804","display_name":"Cellular and Molecular Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cross-entropy","display_name":"Cross entropy","score":0.6892834901809692},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4975261986255646},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.4651435613632202},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3017171621322632},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.17219209671020508}],"concepts":[{"id":"https://openalex.org/C167981619","wikidata":"https://www.wikidata.org/wiki/Q1685498","display_name":"Cross entropy","level":3,"score":0.6892834901809692},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4975261986255646},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.4651435613632202},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3017171621322632},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.17219209671020508},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10888877","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10888877","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320330944","display_name":"Nature","ror":null},{"id":"https://openalex.org/F4320334111","display_name":"Innovation Fund","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W2154053567","https://openalex.org/W3155332104","https://openalex.org/W3201174429","https://openalex.org/W4285294723","https://openalex.org/W4385571039","https://openalex.org/W4385571157","https://openalex.org/W4385571224","https://openalex.org/W4385571376","https://openalex.org/W4385572072","https://openalex.org/W4385572119","https://openalex.org/W4388275401","https://openalex.org/W4389518784","https://openalex.org/W4389520044","https://openalex.org/W4389520703","https://openalex.org/W4389520749","https://openalex.org/W4389520788","https://openalex.org/W4398757454","https://openalex.org/W4399803256","https://openalex.org/W4401042689","https://openalex.org/W4402684121","https://openalex.org/W4411630296","https://openalex.org/W6757817989","https://openalex.org/W6778883912","https://openalex.org/W6781254577","https://openalex.org/W6782879696","https://openalex.org/W6783754597","https://openalex.org/W6838461927","https://openalex.org/W6840896190","https://openalex.org/W6849424676","https://openalex.org/W6852177738","https://openalex.org/W6852870998","https://openalex.org/W6854372151","https://openalex.org/W6855513646","https://openalex.org/W7073593060"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2994927414"],"abstract_inverted_index":{"Recently,":[0],"due":[1],"to":[2,72,82],"the":[3,60,74,112],"explosive":[4],"advances":[5],"of":[6,62,76,114],"large":[7],"language":[8],"models":[9],"(LLMs)":[10],"on":[11,88,104],"English,":[12],"cross-lingual":[13],"continual":[14,51],"pretraining":[15],"has":[16],"been":[17],"widely":[18],"applied":[19],"in":[20],"obtaining":[21,98],"Chinese":[22],"LLMs.":[23],"However,":[24],"previous":[25],"studies":[26],"showed":[27],"that":[28],"these":[29],"LLMs":[30],"have":[31],"suffered":[32],"severe":[33],"hallucinations,":[34],"mainly":[35],"caused":[36],"by":[37],"noisy":[38,63,77],"tokens.":[39,78],"To":[40],"this":[41],"aim,":[42],"we":[43],"propose":[44],"a":[45],"novel":[46],"loss":[47,55],"function,":[48],"InfoLoss":[49,81],"for":[50,95,107],"pretraining.":[52],"Specifically,":[53],"our":[54,115],"function":[56],"takes":[57],"into":[58],"account":[59],"co-occurrence":[61],"and":[64,67,120],"normal":[65],"tokens,":[66],"uses":[68],"point-wise":[69],"mutual":[70],"information":[71],"reduce":[73],"impact":[75],"We":[79,100],"use":[80],"continually":[83],"pretrain":[84],"30":[85],"billion":[86],"tokens":[87],"Llama":[89],"2-7B":[90],"with":[91],"64":[92],"A100":[93],"GPUs":[94],"24":[96],"days,":[97],"C-Llama.":[99],"then":[101],"conduct":[102],"experiments":[103],"12":[105],"benchmarks":[106],"evaluations.":[108],"The":[109],"results":[110],"show":[111],"effectiveness":[113],"proposed":[116],"InfoLoss.":[117],"Our":[118],"datasets":[119],"codes":[121],"are":[122],"publicly":[123],"available":[124],"at":[125],"https://github.com/Fluxation996/C-Llama.":[126]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
