{"id":"https://openalex.org/W4407450298","doi":"https://doi.org/10.1145/3716854","title":"The Evaluation Framework and Benchmark for Large Language Models in the Government Affairs Domain","display_name":"The Evaluation Framework and Benchmark for Large Language Models in the Government Affairs Domain","publication_year":2025,"publication_date":"2025-02-13","ids":{"openalex":"https://openalex.org/W4407450298","doi":"https://doi.org/10.1145/3716854"},"language":"en","primary_location":{"id":"doi:10.1145/3716854","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3716854","pdf_url":null,"source":{"id":"https://openalex.org/S2492086750","display_name":"ACM Transactions on Intelligent Systems and Technology","issn_l":"2157-6904","issn":["2157-6904","2157-6912"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Intelligent Systems and Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000176042","display_name":"S. Liu","orcid":"https://orcid.org/0009-0008-6838-7965"},"institutions":[{"id":"https://openalex.org/I180662265","display_name":"China Mobile (China)","ror":"https://ror.org/05gftfe97","country_code":"CN","type":"company","lineage":["https://openalex.org/I180662265"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shuo Liu","raw_affiliation_strings":["China Mobile Research Institute, Beijing, China"],"affiliations":[{"raw_affiliation_string":"China Mobile Research Institute, Beijing, China","institution_ids":["https://openalex.org/I180662265"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100351907","display_name":"Lin Zhang","orcid":"https://orcid.org/0000-0003-0424-9965"},"institutions":[{"id":"https://openalex.org/I4210096250","display_name":"Beijing Institute of Big Data Research","ror":"https://ror.org/00s1sz824","country_code":"CN","type":"facility","lineage":["https://openalex.org/I20231570","https://openalex.org/I37796252","https://openalex.org/I4210096250"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lin Zhang","raw_affiliation_strings":["Beijing Big Data Centre, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Big Data Centre, Beijing, China","institution_ids":["https://openalex.org/I4210096250"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111892513","display_name":"Weidong Liu","orcid":"https://orcid.org/0009-0009-1796-1397"},"institutions":[{"id":"https://openalex.org/I180662265","display_name":"China Mobile (China)","ror":"https://ror.org/05gftfe97","country_code":"CN","type":"company","lineage":["https://openalex.org/I180662265"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weidong Liu","raw_affiliation_strings":["China Mobile Research Institute, Beijing, China"],"affiliations":[{"raw_affiliation_string":"China Mobile Research Institute, Beijing, China","institution_ids":["https://openalex.org/I180662265"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jianfeng Zhang","orcid":"https://orcid.org/0009-0007-5813-0157"},"institutions":[{"id":"https://openalex.org/I4210096250","display_name":"Beijing Institute of Big Data Research","ror":"https://ror.org/00s1sz824","country_code":"CN","type":"facility","lineage":["https://openalex.org/I20231570","https://openalex.org/I37796252","https://openalex.org/I4210096250"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jianfeng Zhang","raw_affiliation_strings":["Beijing Big Data Centre, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Big Data Centre, Beijing, China","institution_ids":["https://openalex.org/I4210096250"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109205380","display_name":"Donghui Gao","orcid":null},"institutions":[{"id":"https://openalex.org/I180662265","display_name":"China Mobile (China)","ror":"https://ror.org/05gftfe97","country_code":"CN","type":"company","lineage":["https://openalex.org/I180662265"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Donghui Gao","raw_affiliation_strings":["China Mobile Research Institute, Beijing, China"],"affiliations":[{"raw_affiliation_string":"China Mobile Research Institute, Beijing, China","institution_ids":["https://openalex.org/I180662265"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012140948","display_name":"Xiaofeng Jia","orcid":"https://orcid.org/0000-0003-3159-2785"},"institutions":[{"id":"https://openalex.org/I4210096250","display_name":"Beijing Institute of Big Data Research","ror":"https://ror.org/00s1sz824","country_code":"CN","type":"facility","lineage":["https://openalex.org/I20231570","https://openalex.org/I37796252","https://openalex.org/I4210096250"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaofeng Jia","raw_affiliation_strings":["Beijing Big Data Centre, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Big Data Centre, Beijing, China","institution_ids":["https://openalex.org/I4210096250"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5000176042"],"corresponding_institution_ids":["https://openalex.org/I180662265"],"apc_list":null,"apc_paid":null,"fwci":2.0068,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.83167297,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"16","issue":"6","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9150999784469604,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9150999784469604,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10703","display_name":"Business Process Modeling and Analysis","score":0.9025999903678894,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8751909732818604},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.8281382322311401},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.6484128832817078},{"id":"https://openalex.org/keywords/government","display_name":"Government (linguistics)","score":0.594329833984375},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5027623176574707},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4581765830516815},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.39646244049072266},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1124124825000763}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8751909732818604},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.8281382322311401},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.6484128832817078},{"id":"https://openalex.org/C2778137410","wikidata":"https://www.wikidata.org/wiki/Q2732820","display_name":"Government (linguistics)","level":2,"score":0.594329833984375},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5027623176574707},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4581765830516815},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39646244049072266},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1124124825000763},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3716854","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3716854","pdf_url":null,"source":{"id":"https://openalex.org/S2492086750","display_name":"ACM Transactions on Intelligent Systems and Technology","issn_l":"2157-6904","issn":["2157-6904","2157-6912"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Intelligent Systems and Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G242403920","display_name":null,"funder_award_id":"62436010 and U23B2052","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W2914665148","https://openalex.org/W2969289419","https://openalex.org/W2979699288","https://openalex.org/W3016154458","https://openalex.org/W3035322756","https://openalex.org/W3131580611","https://openalex.org/W3139363371","https://openalex.org/W3187502111","https://openalex.org/W4212774244","https://openalex.org/W4390590960","https://openalex.org/W6854692045"],"related_works":["https://openalex.org/W2378211422","https://openalex.org/W4321353415","https://openalex.org/W2745001401","https://openalex.org/W2130974462","https://openalex.org/W2028665553","https://openalex.org/W2086519370","https://openalex.org/W4246352526","https://openalex.org/W2121910908","https://openalex.org/W915438175","https://openalex.org/W1984597391"],"abstract_inverted_index":{"The":[0],"rapid":[1],"evolution":[2],"of":[3,14,50,112,128,226],"AI":[4],"has":[5],"driven":[6],"advancements":[7],"across":[8,87],"numerous":[9],"sectors.":[10],"In":[11],"the":[12,48,93,109,116,120,221,229],"domain":[13],"government":[15,39,77,113,184,230],"affairs,":[16],"large":[17],"language":[18],"models":[19,138,190],"(LLMs)":[20],"hold":[21],"significant":[22],"potential":[23],"for":[24,74,217],"applications":[25],"such":[26],"as":[27],"policy":[28],"analysis,":[29],"data":[30,45],"processing,":[31],"and":[32,54,60,119,142,163,175,203,215,223],"decision":[33],"support.":[34],"However,":[35],"their":[36,208],"adoption":[37],"in":[38,76,183,228],"settings":[40],"faces":[41],"considerable":[42],"challenges,":[43,65],"including":[44,169],"accessibility":[46],"issues,":[47],"absence":[49],"standardized":[51],"evaluation":[52,70,127],"criteria,":[53],"concerns":[55,166],"regarding":[56],"model":[57],"accuracy,":[58,154],"reliability,":[59,143],"security.":[61],"To":[62],"address":[63],"these":[64],"we":[66,91,123],"propose":[67],"a":[68,102],"comprehensive":[69],"framework":[71,84,118],"specifically":[72,105],"designed":[73],"LLMs":[75,227],"affairs.":[78],"Built":[79],"on":[80],"modular":[81],"principles,":[82],"this":[83],"ensures":[85],"adaptability":[86],"various":[88],"industries.":[89],"Additionally,":[90],"introduce":[92],"Multi-Scenario":[94],"Government":[95],"Affairs":[96],"Benchmark":[97],"(MSGABench":[98],"1":[99],")":[100],"dataset,":[101,122],"Chinese-language":[103],"dataset":[104],"crafted":[106],"to":[107,197,220],"meet":[108],"practical":[110],"needs":[111],"professionals.":[114],"Employing":[115],"proposed":[117],"MSGA":[121],"conducted":[124],"an":[125],"empirical":[126],"15":[129],"prominent":[130],"LLMs,":[131],"revealing":[132],"critical":[133],"insights:":[134],"(1)":[135],"Performance:":[136],"Many":[137],"demonstrated":[139],"low":[140],"accuracy":[141],"particularly":[144],"under":[145],"minor":[146],"input":[147],"variations,":[148],"with":[149],"some":[150],"dropping":[151],"below":[152],"35%":[153],"whereas":[155],"GPT-4":[156],"achieved":[157],"above":[158],"95%":[159],"reliability;":[160],"(2)":[161],"Security":[162],"Compliance:":[164],"Significant":[165],"were":[167],"identified,":[168],"privacy":[170],"vulnerabilities,":[171],"legal":[172],"compliance":[173],"risks,":[174],"persistent":[176],"biases,":[177],"which":[178,206],"may":[179],"hinder":[180],"secure":[181],"deployments":[182],"contexts;":[185],"(3)":[186],"Task":[187],"Avoidance:":[188],"Certain":[189],"exhibited":[191],"excessive":[192],"caution,":[193],"often":[194],"avoiding":[195],"responses":[196],"basic":[198],"tasks":[199],"like":[200],"document":[201],"classification":[202],"government-related":[204],"inquiries,":[205],"restricts":[207],"usability.":[209],"These":[210],"findings":[211],"highlight":[212],"essential":[213],"limitations":[214],"opportunities":[216],"improvement,":[218],"contributing":[219],"safe":[222],"effective":[224],"application":[225],"sector.":[231]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
