{"id":"https://openalex.org/W4400531953","doi":"https://doi.org/10.1145/3626772.3657878","title":"C-Pack: Packed Resources For General Chinese Embeddings","display_name":"C-Pack: Packed Resources For General Chinese Embeddings","publication_year":2024,"publication_date":"2024-07-10","ids":{"openalex":"https://openalex.org/W4400531953","doi":"https://doi.org/10.1145/3626772.3657878"},"language":"en","primary_location":{"id":"doi:10.1145/3626772.3657878","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3626772.3657878","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044147794","display_name":"Shitao Xiao","orcid":"https://orcid.org/0000-0003-2567-6843"},"institutions":[{"id":"https://openalex.org/I4210086710","display_name":"Beijing Academy of Social Sciences","ror":"https://ror.org/003ayef58","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210086710"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shitao Xiao","raw_affiliation_strings":["Beijing Academy of AI, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Academy of AI, Beijing, China","institution_ids":["https://openalex.org/I4210086710"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100423656","display_name":"Zheng Liu","orcid":"https://orcid.org/0000-0001-7765-8466"},"institutions":[{"id":"https://openalex.org/I4210086710","display_name":"Beijing Academy of Social Sciences","ror":"https://ror.org/003ayef58","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210086710"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheng Liu","raw_affiliation_strings":["Beijing Academy of AI, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Academy of AI, Beijing, China","institution_ids":["https://openalex.org/I4210086710"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074258457","display_name":"Peitian Zhang","orcid":"https://orcid.org/0009-0007-1926-7433"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peitian Zhang","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103309504","display_name":"Niklas Muennighoff","orcid":"https://orcid.org/0009-0001-7157-770X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Niklas Muennighoff","raw_affiliation_strings":["HuggingFace, Beijing, China"],"affiliations":[{"raw_affiliation_string":"HuggingFace, Beijing, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085254654","display_name":"Defu Lian","orcid":"https://orcid.org/0000-0002-3507-9607"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Defu Lian","raw_affiliation_strings":["UTSC, Hefei, China"],"affiliations":[{"raw_affiliation_string":"UTSC, Hefei, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018977183","display_name":"Jian\u2010Yun Nie","orcid":"https://orcid.org/0000-0003-1556-3335"},"institutions":[{"id":"https://openalex.org/I70931966","display_name":"Universit\u00e9 de Montr\u00e9al","ror":"https://ror.org/0161xgx34","country_code":"CA","type":"education","lineage":["https://openalex.org/I70931966"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jian-Yun Nie","raw_affiliation_strings":["University of Montreal, Montreal, Canada"],"affiliations":[{"raw_affiliation_string":"University of Montreal, Montreal, Canada","institution_ids":["https://openalex.org/I70931966"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5044147794"],"corresponding_institution_ids":["https://openalex.org/I4210086710"],"apc_list":null,"apc_paid":null,"fwci":76.9664,"has_fulltext":false,"cited_by_count":229,"citation_normalized_percentile":{"value":0.99952257,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"641","last_page":"649"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.988099992275238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/packed-bed","display_name":"Packed bed","score":0.5543255805969238},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4795655608177185},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.2591403126716614},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.16800904273986816}],"concepts":[{"id":"https://openalex.org/C172331833","wikidata":"https://www.wikidata.org/wiki/Q751029","display_name":"Packed bed","level":2,"score":0.5543255805969238},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4795655608177185},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.2591403126716614},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.16800904273986816}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3626772.3657878","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3626772.3657878","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 47th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2126400076","https://openalex.org/W2903314293","https://openalex.org/W2963957489","https://openalex.org/W3027879771","https://openalex.org/W3169113923","https://openalex.org/W4221159373","https://openalex.org/W4284678164","https://openalex.org/W4284685333","https://openalex.org/W4290877239","https://openalex.org/W6600001191","https://openalex.org/W6600538214","https://openalex.org/W6604957599","https://openalex.org/W6893809404"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2524409732","https://openalex.org/W4396696052","https://openalex.org/W2382290278"],"abstract_inverted_index":{"We":[0,97],"introduce":[1],"C-Pack,":[2],"a":[3,26,53,69],"package":[4],"of":[5,12,39,46,71,94,105,162],"resources":[6,113,170],"that":[7],"significantly":[8],"advances":[9],"the":[10,37,44,92,95,102,136,149,158],"field":[11],"general":[13,115],"text":[14,31,58,83,126,166],"embeddings":[15,59,84],"for":[16,30,56,108,124,165],"Chinese.":[17],"C-Pack":[18],"includes":[19],"three":[20],"critical":[21],"resources.":[22],"1)":[23],"C-MTP":[24],"is":[25,34,52,68,144],"massive":[27],"training":[28,106,163],"dataset":[29],"embedding,":[32,117],"which":[33],"based":[35],"on":[36,85,114,135],"curation":[38],"vast":[40],"unlabeled":[41],"corpora":[42],"and":[43,63,100,122,154],"integration":[45],"high-quality":[47],"labeled":[48],"corpora.":[49],"2)":[50],"C-MTEB":[51,86],"comprehensive":[54],"benchmark":[55],"Chinese":[57,82,116,150,153],"covering":[60,74],"6":[61],"tasks":[62],"35":[64],"datasets.":[65],"3)":[66],"BGE":[67],"family":[70],"embedding":[72],"models":[73,78,123,130],"multiple":[75],"sizes.":[76],"Our":[77],"outperform":[79],"all":[80],"prior":[81],"by":[87],"more":[88],"than":[89,148],"+10%":[90],"upon":[91],"time":[93],"release.":[96],"also":[98,131],"integrate":[99],"optimize":[101],"entire":[103],"suite":[104],"methods":[107],"BGE.":[109],"Along":[110],"with":[111],"our":[112,120,140],"we":[118],"release":[119,161],"data":[121,143,164],"English":[125,129,142,155],"embeddings.":[127,167],"The":[128],"achieve":[132],"state-of-the-art":[133],"performance":[134],"MTEB":[137],"benchmark;":[138],"meanwhile,":[139],"released":[141],"2":[145],"times":[146],"larger":[147],"data.":[151],"Both":[152],"datasets":[156],"are":[157,171],"largest":[159],"public":[160],"All":[168],"these":[169],"made":[172],"publicly":[173],"available":[174],"at":[175],"https://github.com/FlagOpen/FlagEmbedding.":[176]},"counts_by_year":[{"year":2026,"cited_by_count":43},{"year":2025,"cited_by_count":152},{"year":2024,"cited_by_count":33},{"year":2023,"cited_by_count":1}],"updated_date":"2026-04-15T08:11:43.952461","created_date":"2025-10-10T00:00:00"}
