{"id":"https://openalex.org/W4412945485","doi":"https://doi.org/10.18653/v1/2025.acl-long.438","title":"CypherBench: Towards Precise Retrieval over Full-scale Modern Knowledge Graphs in the LLM Era","display_name":"CypherBench: Towards Precise Retrieval over Full-scale Modern Knowledge Graphs in the LLM Era","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412945485","doi":"https://doi.org/10.18653/v1/2025.acl-long.438"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.acl-long.438","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.acl-long.438","pdf_url":"https://aclanthology.org/2025.acl-long.438.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.acl-long.438.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045853966","display_name":"Yanlin Feng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yanlin Feng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5099161595","display_name":"Simone Papicchio","orcid":"https://orcid.org/0009-0005-5361-0042"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Simone Papicchio","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5040390469","display_name":"Sajjadur Rahman","orcid":"https://orcid.org/0000-0003-4210-1582"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sajjadur Rahman","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":8.681,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.97418983,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"8934","last_page":"8958"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.974399983882904,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.974399983882904,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9610999822616577,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11063","display_name":"Rough Sets and Fuzzy Logic","score":0.9277999997138977,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/knowledge-graph","display_name":"Knowledge graph","score":0.6806157827377319},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6757292747497559},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.49825477600097656},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4557674527168274},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.340853214263916},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33586907386779785},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.06764090061187744},{"id":"https://openalex.org/keywords/cartography","display_name":"Cartography","score":0.06566429138183594}],"concepts":[{"id":"https://openalex.org/C2987255567","wikidata":"https://www.wikidata.org/wiki/Q33002955","display_name":"Knowledge graph","level":2,"score":0.6806157827377319},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6757292747497559},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.49825477600097656},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4557674527168274},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.340853214263916},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33586907386779785},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.06764090061187744},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.06566429138183594}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.acl-long.438","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.acl-long.438","pdf_url":"https://aclanthology.org/2025.acl-long.438.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.acl-long.438","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.acl-long.438","pdf_url":"https://aclanthology.org/2025.acl-long.438.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412945485.pdf","grobid_xml":"https://content.openalex.org/works/W4412945485.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W3188962172","https://openalex.org/W2772917594","https://openalex.org/W4312825515","https://openalex.org/W4306742369","https://openalex.org/W4303457083","https://openalex.org/W2131146434","https://openalex.org/W2951359407","https://openalex.org/W4376623224","https://openalex.org/W4387849428","https://openalex.org/W2965396565"],"abstract_inverted_index":{"Retrieval":[0],"from":[1,60],"graph":[2,118,126,172],"data":[3],"is":[4,22],"crucial":[5],"for":[6,58,87,179],"augmenting":[7],"large":[8,92],"language":[9],"models":[10],"(LLM)":[11],"with":[12,147,153],"both":[13],"open-domain":[14],"knowledge":[15,40,43,63,79],"and":[16,20,42,52,74,109,141,157,183],"private":[17],"enterprise":[18],"data,":[19],"it":[21],"also":[23],"a":[24,113,176],"key":[25,166],"component":[26],"in":[27],"the":[28,71,97,123,144],"recent":[29],"GraphRAG":[30],"system":[31],"(Edge":[32],"et":[33],"al.,":[34],"2024).Despite":[35],"decades":[36],"of":[37,103,111,122],"research":[38],"on":[39,120,139],"graphs":[41,64,80,152,188],"base":[44],"question":[45],"answering,":[46],"leading":[47],"LLM":[48,99],"frameworks":[49],"(e.g.,":[50,81],"Langchain":[51],"LlamaIndex)":[53],"have":[54],"only":[55],"minimal":[56],"support":[57],"retrieval":[59],"modern":[61,77],"encyclopedic":[62],"like":[65],"Wikidata.In":[66],"this":[67,137],"paper,":[68],"we":[69,115,163],"analyze":[70],"root":[72],"cause":[73],"suggest":[75],"that":[76,94,127],"RDF":[78,125],"Wikidata,":[82],"Freebase)":[83],"are":[84],"less":[85],"efficient":[86],"LLMs":[88,133],"due":[89],"to":[90],"overly":[91],"schemas":[93],"far":[95],"exceed":[96],"typical":[98],"context":[100],"window,":[101],"use":[102],"resource":[104],"identifiers,":[105],"overlapping":[106],"relation":[107],"types":[108],"lack":[110],"normalization.As":[112],"solution,":[114],"propose":[116],"property":[117,151],"views":[119],"top":[121],"underlying":[124],"can":[128],"be":[129],"efficiently":[130],"queried":[131],"by":[132],"using":[134],"Cypher.We":[135],"instantiated":[136],"idea":[138],"Wikidata":[140],"introduced":[142],"CypherBench,":[143],"first":[145],"benchmark":[146],"11":[148],"large-scale,":[149],"multi-domain":[150],"7.8":[154],"million":[155],"entities":[156],"over":[158],"10,000":[159],"questions.To":[160],"achieve":[161],"this,":[162],"tackled":[164],"several":[165],"challenges,":[167],"including":[168],"developing":[169],"an":[170],"RDF-to-property":[171],"conversion":[173],"engine,":[174],"creating":[175],"systematic":[177],"pipeline":[178],"textto-Cypher":[180],"task":[181],"generation,":[182],"designing":[184],"new":[185],"evaluation":[186],"metrics.RDF":[187],"Schema":[189],"Datatype":[190],"conversion,":[191],"unit":[192],"standardiza8on,":[193],"etc.":[194]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":3}],"updated_date":"2026-06-13T07:54:00.901334","created_date":"2025-10-10T00:00:00"}
