{"id":"https://openalex.org/W4402671984","doi":"https://doi.org/10.18653/v1/2024.acl-long.676","title":"On the Representational Capacity of Neural Language Models with Chain-of-Thought Reasoning","display_name":"On the Representational Capacity of Neural Language Models with Chain-of-Thought Reasoning","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4402671984","doi":"https://doi.org/10.18653/v1/2024.acl-long.676"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2024.acl-long.676","is_oa":true,"landing_page_url":"http://dx.doi.org/10.18653/v1/2024.acl-long.676","pdf_url":"https://aclanthology.org/2024.acl-long.676.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2024.acl-long.676.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102590192","display_name":"Franz Nowak","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Franz Nowak","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071101707","display_name":"Anej Svete","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Anej Svete","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003361309","display_name":"Alexandra Butoi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alexandra Butoi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5061951606","display_name":"Ryan Cotterell","orcid":"https://orcid.org/0000-0003-4080-1833"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ryan Cotterell","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102590192"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3595,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.67074256,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"12510","last_page":"12548"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9383000135421753,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9383000135421753,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6355394721031189},{"id":"https://openalex.org/keywords/cognitive-science","display_name":"Cognitive science","score":0.525281548500061},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.47129857540130615},{"id":"https://openalex.org/keywords/chain","display_name":"Chain (unit)","score":0.46013206243515015},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4483717978000641},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.34941935539245605},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.2080990970134735}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6355394721031189},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.525281548500061},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.47129857540130615},{"id":"https://openalex.org/C199185054","wikidata":"https://www.wikidata.org/wiki/Q552299","display_name":"Chain (unit)","level":2,"score":0.46013206243515015},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4483717978000641},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34941935539245605},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2080990970134735},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2024.acl-long.676","is_oa":true,"landing_page_url":"http://dx.doi.org/10.18653/v1/2024.acl-long.676","pdf_url":"https://aclanthology.org/2024.acl-long.676.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2024.acl-long.676","is_oa":true,"landing_page_url":"http://dx.doi.org/10.18653/v1/2024.acl-long.676","pdf_url":"https://aclanthology.org/2024.acl-long.676.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.44999998807907104}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4402671984.pdf","grobid_xml":"https://content.openalex.org/works/W4402671984.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2169518243","https://openalex.org/W2742360428","https://openalex.org/W2060570504","https://openalex.org/W4320830217","https://openalex.org/W3188962172","https://openalex.org/W2772917594","https://openalex.org/W4306742369","https://openalex.org/W4303457083","https://openalex.org/W2131146434","https://openalex.org/W3204019825"],"abstract_inverted_index":{"The":[0],"performance":[1],"of":[2,17,94,110],"modern":[3],"language":[4,68],"models":[5],"(LMs)":[6],"has":[7],"been":[8],"improved":[9],"by":[10],"chain-of-thought":[11],"(CoT)":[12],"reasoning,":[13,101],"i.e.,":[14],"the":[15,23,91,107,123,130],"process":[16],"generating":[18],"intermediate":[19],"results":[20,89],"that":[21,35,103],"guide":[22],"model":[24],"towards":[25],"a":[26,63,84],"final":[27],"answer.A":[28],"possible":[29],"explanation":[30],"for":[31],"this":[32,77],"improvement":[33],"is":[34,137],"CoT":[36,81,100,127,133,143],"reasoning":[37,82,128,144],"extends":[38],"an":[39],"LM's":[40],"computational":[41],"power,":[42],"as":[43,114,135],"RNNs":[44],"and":[45,96,146],"transformers":[46],"with":[47,99],"additional":[48],"scratch":[49],"space":[50],"are":[51],"known":[52],"to":[53,58,141],"be":[54],"Turing":[55,59,116],"complete.Comparing":[56],"LMs":[57,71,98],"machines,":[60],"however,":[61],"introduces":[62],"category":[64],"error-Turing":[65],"machines":[66],"decide":[67],"membership,":[69],"whereas":[70],"define":[72],"distributions":[73,111],"over":[74,112,129],"strings.To":[75],"bridge":[76],"gap,":[78],"we":[79],"formalize":[80],"in":[83],"probabilistic":[85,115],"setting.We":[86],"present":[87],"several":[88],"on":[90],"representational":[92],"capacity":[93],"recurrent":[95],"transformer":[97],"showing":[102],"they":[104],"can":[105],"represent":[106],"same":[108],"family":[109],"strings":[113],"machines.https://github.com/rycolab/cot-lms":[117],"*Equal":[118],"contribution.":[119],"1":[120],"We":[121],"use":[122],"more":[124],"general":[125],"term":[126,132],"original":[131],"prompting":[134,136],"just":[138],"one":[139],"way":[140],"elicit":[142],"(Wang":[145],"Zhou,":[147],"2024).":[148]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-12T06:13:28.667946","created_date":"2025-10-10T00:00:00"}
