{"id":"https://openalex.org/W4224874866","doi":"https://doi.org/10.1162/tacl_a_00490","title":"Formal Language Recognition by Hard Attention Transformers: Perspectives from Circuit Complexity","display_name":"Formal Language Recognition by Hard Attention Transformers: Perspectives from Circuit Complexity","publication_year":2022,"publication_date":"2022-01-01","ids":{"openalex":"https://openalex.org/W4224874866","doi":"https://doi.org/10.1162/tacl_a_00490"},"language":"en","primary_location":{"id":"doi:10.1162/tacl_a_00490","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00490","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00490/2037124/tacl_a_00490.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00490/2037124/tacl_a_00490.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083460017","display_name":"Yiding Hao","orcid":null},"institutions":[{"id":"https://openalex.org/I32971472","display_name":"Yale University","ror":"https://ror.org/03v76x132","country_code":"US","type":"education","lineage":["https://openalex.org/I32971472"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yiding Hao","raw_affiliation_strings":["Yale University, New Haven, CT, USA. yiding.hao@yale.edu"],"affiliations":[{"raw_affiliation_string":"Yale University, New Haven, CT, USA. yiding.hao@yale.edu","institution_ids":["https://openalex.org/I32971472"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022630792","display_name":"Dana Angluin","orcid":"https://orcid.org/0000-0002-6907-2999"},"institutions":[{"id":"https://openalex.org/I32971472","display_name":"Yale University","ror":"https://ror.org/03v76x132","country_code":"US","type":"education","lineage":["https://openalex.org/I32971472"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dana Angluin","raw_affiliation_strings":["Yale University, New Haven, CT, USA. dana.angluin@yale.edu"],"affiliations":[{"raw_affiliation_string":"Yale University, New Haven, CT, USA. dana.angluin@yale.edu","institution_ids":["https://openalex.org/I32971472"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5071457812","display_name":"Robert Frank","orcid":"https://orcid.org/0000-0001-9253-1658"},"institutions":[{"id":"https://openalex.org/I32971472","display_name":"Yale University","ror":"https://ror.org/03v76x132","country_code":"US","type":"education","lineage":["https://openalex.org/I32971472"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Robert Frank","raw_affiliation_strings":["Yale University, New Haven, CT, USA. robert.frank@yale.edu"],"affiliations":[{"raw_affiliation_string":"Yale University, New Haven, CT, USA. robert.frank@yale.edu","institution_ids":["https://openalex.org/I32971472"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5022630792","https://openalex.org/A5071457812","https://openalex.org/A5083460017"],"corresponding_institution_ids":["https://openalex.org/I32971472"],"apc_list":null,"apc_paid":null,"fwci":1.8297,"has_fulltext":true,"cited_by_count":20,"citation_normalized_percentile":{"value":0.84738966,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"10","issue":null,"first_page":"800","last_page":"810"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.987500011920929,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7580603361129761},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.557869553565979},{"id":"https://openalex.org/keywords/formal-language","display_name":"Formal language","score":0.5130125880241394},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.4833900034427643},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.43295711278915405},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3244318664073944}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7580603361129761},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.557869553565979},{"id":"https://openalex.org/C146072743","wikidata":"https://www.wikidata.org/wiki/Q192161","display_name":"Formal language","level":2,"score":0.5130125880241394},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4833900034427643},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.43295711278915405},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3244318664073944},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1162/tacl_a_00490","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00490","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00490/2037124/tacl_a_00490.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:2d6e3a021ef24a47b04cab72661d6411","is_oa":true,"landing_page_url":"https://doaj.org/article/2d6e3a021ef24a47b04cab72661d6411","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Transactions of the Association for Computational Linguistics, Vol 10 (2022)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/tacl_a_00490","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00490","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00490/2037124/tacl_a_00490.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6700000166893005,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4224874866.pdf","grobid_xml":"https://content.openalex.org/works/W4224874866.grobid-xml"},"referenced_works_count":13,"referenced_works":["https://openalex.org/W1902237438","https://openalex.org/W2060270693","https://openalex.org/W2612690371","https://openalex.org/W2908802752","https://openalex.org/W2952744660","https://openalex.org/W3014096773","https://openalex.org/W3098666169","https://openalex.org/W3175990774","https://openalex.org/W4290994975","https://openalex.org/W4298227433","https://openalex.org/W6739901393","https://openalex.org/W6771550025","https://openalex.org/W6840465419"],"related_works":["https://openalex.org/W2275988210","https://openalex.org/W2385621972","https://openalex.org/W2589098947","https://openalex.org/W4231964008","https://openalex.org/W2771022762","https://openalex.org/W2014589784","https://openalex.org/W2807289511","https://openalex.org/W2386767533","https://openalex.org/W3157910026","https://openalex.org/W2547835662"],"abstract_inverted_index":{"Abstract":[0],"This":[1,73],"paper":[2],"analyzes":[3],"three":[4],"formal":[5,50],"models":[6],"of":[7,15,59,64,67],"Transformer":[8],"encoders":[9],"that":[10,38,80,115,120],"differ":[11],"in":[12,52],"the":[13,53,57,84,88,103],"form":[14],"their":[16],"self-attention":[17],"mechanism:":[18],"unique":[19,24],"hard":[20,25,33],"attention":[21,26,34],"(UHAT);":[22],"generalized":[23],"(GUHAT),":[27],"which":[28],"generalizes":[29],"UHAT;":[30],"and":[31,40,70,107,122],"averaging":[32],"(AHAT).":[35],"We":[36],"show":[37],"UHAT":[39,121],"GUHAT":[41,81,123],"Transformers,":[42],"viewed":[43],"as":[44],"string":[45],"acceptors,":[46],"can":[47,117],"only":[48],"recognize":[49,83,118],"languages":[51,60,86,93,105,119],"complexity":[54],"class":[55,58],"AC0,":[56],"recognizable":[61,110],"by":[62,111],"families":[63],"Boolean":[65],"circuits":[66],"constant":[68],"depth":[69],"polynomial":[71],"size.":[72],"upper":[74],"bound":[75],"subsumes":[76],"Hahn\u2019s":[77],"(2020)":[78],"results":[79],"cannot":[82],"DYCK":[85],"or":[87],"PARITY":[89],"language,":[90],"since":[91],"those":[92],"are":[94,109],"outside":[95],"AC0":[96],"(Furst":[97],"et":[98],"al.,":[99],"1984).":[100],"In":[101],"contrast,":[102],"non-AC0":[104],"MAJORITY":[106],"DYCK-1":[108],"AHAT":[112,116],"networks,":[113],"implying":[114],"cannot.":[124]},"counts_by_year":[{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
