{"id":"https://openalex.org/W4296549281","doi":"https://doi.org/10.48550/arxiv.2209.08372","title":"CodeQueries: A Dataset of Semantic Queries over Code","display_name":"CodeQueries: A Dataset of Semantic Queries over Code","publication_year":2022,"publication_date":"2022-09-17","ids":{"openalex":"https://openalex.org/W4296549281","doi":"https://doi.org/10.48550/arxiv.2209.08372"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2209.08372","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.08372","pdf_url":"https://arxiv.org/pdf/2209.08372","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2209.08372","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079862927","display_name":"Surya Prakash Sahu","orcid":"https://orcid.org/0009-0003-9943-5222"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sahu, Surya Prakash","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076870662","display_name":"Madhurima Mandal","orcid":"https://orcid.org/0000-0003-3084-3757"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mandal, Madhurima","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067162151","display_name":"Shikhar Bharadwaj","orcid":"https://orcid.org/0009-0003-7202-0502"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bharadwaj, Shikhar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107245208","display_name":"Aditya Kanade","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kanade, Aditya","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102725147","display_name":"Petros Maniatis","orcid":"https://orcid.org/0000-0003-3777-5291"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maniatis, Petros","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5026126465","display_name":"Shirish Shevade","orcid":"https://orcid.org/0009-0009-7202-6860"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shevade, Shirish","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5079862927"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.991100013256073,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8237485885620117},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.5684075951576233},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5369349122047424},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5099838972091675},{"id":"https://openalex.org/keywords/python","display_name":"Python (programming language)","score":0.4728914797306061},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4579640030860901},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.4430798292160034},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.41697239875793457},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4006541073322296}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8237485885620117},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.5684075951576233},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5369349122047424},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5099838972091675},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.4728914797306061},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4579640030860901},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.4430798292160034},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.41697239875793457},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4006541073322296},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2209.08372","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.08372","pdf_url":"https://arxiv.org/pdf/2209.08372","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2209.08372","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2209.08372","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2209.08372","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2209.08372","pdf_url":"https://arxiv.org/pdf/2209.08372","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2341492732","https://openalex.org/W3187193180","https://openalex.org/W106542691","https://openalex.org/W4287027380","https://openalex.org/W1699080303","https://openalex.org/W4297799326","https://openalex.org/W2384605597","https://openalex.org/W2207495067","https://openalex.org/W1906486629","https://openalex.org/W2788308474"],"abstract_inverted_index":{"Developers":[0],"often":[1],"have":[2],"questions":[3,80],"about":[4,107],"semantic":[5,92],"aspects":[6],"of":[7,37,55,66,91,154,177,209],"code":[8,29,47,76,108,119,214],"they":[9],"are":[10,106,118],"working":[11],"on,":[12],"e.g.,":[13],"\"Is":[14],"there":[15],"a":[16,22,43,86,130,164,175,182,202],"class":[17],"whose":[18],"parent":[19],"classes":[20],"declare":[21],"conflicting":[23,68],"attribute?\".":[24],"Answering":[25],"them":[26],"requires":[27],"understanding":[28],"semantics":[30],"such":[31,42],"as":[32,58,60],"attributes":[33],"and":[34,115,136,140,143,147,171],"inheritance":[35],"relation":[36],"classes.":[38],"An":[39],"answer":[40,51],"to":[41,98,205,212],"question":[44],"should":[45],"identify":[46],"spans":[48],"constituting":[49],"the":[50,53,56,64,67,99,104,110,116,123,152,207,217],"(e.g.,":[52,63],"declaration":[54],"subclass)":[57],"well":[59],"supporting":[61],"facts":[62],"definitions":[65],"attributes).":[69],"The":[70],"existing":[71,100],"work":[72],"on":[73,126,174,197],"question-answering":[74,219],"over":[75,94],"has":[77],"considered":[78],"yes/no":[79],"or":[81],"method-level":[82],"context.":[83],"We":[84,121,162,179,189],"contribute":[85],"labeled":[87],"dataset,":[88,156],"called":[89],"CodeQueries,":[90,103],"queries":[93,105,127,144],"Python":[95],"code.":[96],"Compared":[97],"datasets,":[101],"in":[102,169,216],"semantics,":[109,215],"context":[111],"is":[112,200],"file":[113],"level":[114],"answers":[117],"spans.":[120],"curate":[122],"dataset":[124,204],"based":[125],"supported":[128],"by":[129],"widely-used":[131],"static":[132],"analysis":[133],"tool,":[134],"CodeQL,":[135],"include":[137],"both":[138],"positive":[139],"negative":[141],"examples,":[142],"requiring":[145],"single-hop":[146],"multi-hop":[148],"reasoning.":[149],"To":[150],"assess":[151],"value":[153],"our":[155],"we":[157],"evaluate":[158,181],"baseline":[159],"neural":[160,210],"approaches.":[161],"study":[163],"large":[165],"language":[166],"model":[167,185],"(GPT3.5-Turbo)":[168],"zero-shot":[170],"few-shot":[172],"settings":[173],"subset":[176],"CodeQueries.":[178,198],"also":[180],"BERT":[183],"style":[184],"(CuBERT)":[186],"with":[187],"fine-tuning.":[188],"find":[190],"that":[191],"these":[192],"models":[193],"achieve":[194],"limited":[195],"success":[196],"CodeQueries":[199],"thus":[201],"challenging":[203],"test":[206],"ability":[208],"models,":[211],"understand":[213],"extractive":[218],"setting.":[220]},"counts_by_year":[],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
