{"id":"https://openalex.org/W4416037035","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.303","title":"QualBench: Benchmarking Chinese LLMs with Localized Professional Qualifications for Vertical Domain Evaluation","display_name":"QualBench: Benchmarking Chinese LLMs with Localized Professional Qualifications for Vertical Domain Evaluation","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416037035","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.303"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.303","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.303","pdf_url":"https://aclanthology.org/2025.emnlp-main.303.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.303.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092758415","display_name":"Mengze Hong","orcid":"https://orcid.org/0009-0003-3188-4208"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mengze Hong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113418406","display_name":"Wailing Ng","orcid":"https://orcid.org/0000-0002-4932-7499"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wailing Ng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100374113","display_name":"Chen Zhang","orcid":"https://orcid.org/0000-0002-3306-9317"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen Jason Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100685763","display_name":"Di Jiang","orcid":"https://orcid.org/0000-0002-1950-2292"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Di Jiang","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5092758415"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17575103,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"5949","last_page":"5964"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.24629999697208405,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.24629999697208405,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.1858000010251999,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.1770000010728836,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.6948999762535095},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5328999757766724},{"id":"https://openalex.org/keywords/empirical-research","display_name":"Empirical research","score":0.33970001339912415},{"id":"https://openalex.org/keywords/china","display_name":"China","score":0.30869999527931213},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.26809999346733093}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.6948999762535095},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5328999757766724},{"id":"https://openalex.org/C144133560","wikidata":"https://www.wikidata.org/wiki/Q4830453","display_name":"Business","level":0,"score":0.41370001435279846},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.388700008392334},{"id":"https://openalex.org/C121955636","wikidata":"https://www.wikidata.org/wiki/Q4116214","display_name":"Accounting","level":1,"score":0.3635999858379364},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.33970001339912415},{"id":"https://openalex.org/C39549134","wikidata":"https://www.wikidata.org/wiki/Q133080","display_name":"Public relations","level":1,"score":0.32659998536109924},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.32010000944137573},{"id":"https://openalex.org/C191935318","wikidata":"https://www.wikidata.org/wiki/Q148","display_name":"China","level":2,"score":0.30869999527931213},{"id":"https://openalex.org/C110354214","wikidata":"https://www.wikidata.org/wiki/Q6314146","display_name":"Engineering management","level":1,"score":0.3012999892234802},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2881999909877777},{"id":"https://openalex.org/C162853370","wikidata":"https://www.wikidata.org/wiki/Q39809","display_name":"Marketing","level":1,"score":0.2874999940395355},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.27379998564720154},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.266400009393692},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.25270000100135803},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.25130000710487366}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.303","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.303","pdf_url":"https://aclanthology.org/2025.emnlp-main.303.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},{"id":"pmh:oai:ira.lib.polyu.edu.hk:10397/116360","is_oa":true,"landing_page_url":"http://hdl.handle.net/10397/116360","pdf_url":"http://ira.lib.polyu.edu.hk/bitstream/10397/116360/1/2025.emnlp-main.303.pdf","source":{"id":"https://openalex.org/S4306400205","display_name":"PolyU Institutional Research Archive (Hong Kong Polytechnic University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I14243506","host_organization_name":"Hong Kong Polytechnic University","host_organization_lineage":["https://openalex.org/I14243506"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference Paper"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.303","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.303","pdf_url":"https://aclanthology.org/2025.emnlp-main.303.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5058124604","display_name":null,"funder_award_id":"P0048887","funder_id":"https://openalex.org/F4320326427","funder_display_name":"Innovation and Technology Fund"}],"funders":[{"id":"https://openalex.org/F4320322598","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98"},{"id":"https://openalex.org/F4320326427","display_name":"Innovation and Technology Fund","ror":null}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416037035.pdf","grobid_xml":"https://content.openalex.org/works/W4416037035.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"rapid":[1],"advancement":[2],"of":[3,55,84,103,113,140],"Chinese":[4,28,48,56,70,85],"LLMs":[5,86],"underscores":[6],"the":[7,27,45,92,96,101,116,138],"need":[8],"for":[9,37,148],"vertical-domain":[10],"evaluations":[11],"to":[12,52,72],"ensure":[13],"reliable":[14],"applications.However,":[15],"existing":[16],"benchmarks":[17],"often":[18],"lack":[19],"domain":[20,105,120],"coverage":[21,121],"and":[22,77,136,143,154],"provide":[23],"limited":[24],"insights":[25],"into":[26],"working":[29],"context.Leveraging":[30],"qualification":[31,109],"exams":[32],"as":[33],"a":[34],"unified":[35],"framework":[36],"expertise":[38],"evaluation,":[39],"we":[40,125],"introduce":[41],"QualBench":[42],"1":[43],",":[44],"first":[46],"multi-domain":[47,152],"QA":[49],"benchmark":[50],"dedicated":[51],"localized":[53,104],"assessment":[54],"LLMs.The":[57],"dataset":[58],"includes":[59],"over":[60],"17,000":[61],"questions":[62],"across":[63],"six":[64],"vertical":[65],"domains,":[66],"drawn":[67],"from":[68],"24":[69],"qualifications":[71],"align":[73],"with":[74,91],"national":[75],"policies":[76],"professional":[78],"standards.Results":[79],"reveal":[80],"an":[81],"interesting":[82],"pattern":[83],"consistently":[87],"surpassing":[88],"non-Chinese":[89],"models,":[90],"Qwen2.5":[93],"model":[94,123,144],"outperforming":[95],"more":[97],"advanced":[98],"GPT-4o,":[99],"emphasizing":[100],"value":[102],"knowledge":[106],"in":[107,119],"meeting":[108],"requirements.The":[110],"average":[111],"accuracy":[112],"53.98%":[114],"reveals":[115],"current":[117],"gaps":[118],"within":[122],"capabilities.Furthermore,":[124],"identify":[126],"performance":[127],"degradation":[128],"caused":[129],"by":[130],"LLM":[131],"crowdsourcing,":[132],"assess":[133],"data":[134],"contamination,":[135],"illustrate":[137],"effectiveness":[139],"prompt":[141],"engineering":[142],"fine-tuning,":[145],"suggesting":[146],"opportunities":[147],"future":[149],"improvements":[150],"through":[151],"RAG":[153],"Federated":[155],"Learning.":[156]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-11-08T00:00:00"}
