{"id":"https://openalex.org/W4412889383","doi":"https://doi.org/10.18653/v1/2025.acl-srw.34","title":"StRuCom: A Novel Dataset of Structured Code Comments in Russian","display_name":"StRuCom: A Novel Dataset of Structured Code Comments in Russian","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412889383","doi":"https://doi.org/10.18653/v1/2025.acl-srw.34"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.acl-srw.34","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.acl-srw.34","pdf_url":"https://aclanthology.org/2025.acl-srw.34.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.acl-srw.34.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108300703","display_name":"Maria Dziuba","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Maria Dziuba","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5006755109","display_name":"Valentin Malykh","orcid":"https://orcid.org/0000-0002-4508-2527"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Valentin Malykh","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5108300703"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.10009891,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"517","last_page":"527"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9839000105857849,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7294700741767883},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5913052558898926},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.5237604975700378},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.40962618589401245},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3713489770889282},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33957478404045105}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7294700741767883},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5913052558898926},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.5237604975700378},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.40962618589401245},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3713489770889282},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33957478404045105},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.acl-srw.34","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.acl-srw.34","pdf_url":"https://aclanthology.org/2025.acl-srw.34.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.acl-srw.34","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.acl-srw.34","pdf_url":"https://aclanthology.org/2025.acl-srw.34.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 4: Student Research Workshop)","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412889383.pdf","grobid_xml":"https://content.openalex.org/works/W4412889383.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4231937131","https://openalex.org/W3188962172","https://openalex.org/W323219885","https://openalex.org/W2063928587","https://openalex.org/W2772917594","https://openalex.org/W1487966966","https://openalex.org/W4312825515","https://openalex.org/W1589342014","https://openalex.org/W4306742369","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Structured":[0],"code":[1,9,44],"comments":[2,64],"in":[3],"docstring":[4,59],"format":[5],"are":[6],"essential":[7],"for":[8,18,23,42],"comprehension":[10],"and":[11,58,80],"maintenance,":[12],"but":[13],"existing":[14],"machine":[15],"learning":[16],"models":[17],"their":[19],"generation":[20],"perform":[21],"poorly":[22],"Russian":[24,43,66],"compared":[25],"to":[26],"English.To":[27],"bridge":[28],"this":[29],"gap,":[30],"we":[31],"present":[32],"StRuCom":[33,61],"-the":[34],"first":[35],"large-scale":[36],"dataset":[37],"(153K":[38],"examples)":[39],"specifically":[40],"designed":[41],"documentation.Unlike":[45],"machine-translated":[46],"English":[47],"datasets":[48],"that":[49],"distort":[50],"terminology":[51],"(e.g.,":[52],"technical":[53],"loanwords":[54],"vs.":[55],"literal":[56],"translations)":[57],"structures,":[60],"combines":[62],"human-written":[63],"from":[65],"GitHub":[67],"repositories":[68],"with":[69,75],"synthetically":[70],"generated":[71],"ones,":[72],"ensuring":[73],"compliance":[74],"Python,":[76],"Java,":[77],"JavaScript,":[78],"C#,":[79],"Go":[81],"standards":[82],"through":[83],"automated":[84],"validation.":[85]},"counts_by_year":[],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2025-10-10T00:00:00"}
