{"id":"https://openalex.org/W4403116247","doi":"https://doi.org/10.48550/arxiv.2410.01537","title":"Attention layers provably solve single-location regression","display_name":"Attention layers provably solve single-location regression","publication_year":2024,"publication_date":"2024-10-02","ids":{"openalex":"https://openalex.org/W4403116247","doi":"https://doi.org/10.48550/arxiv.2410.01537"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2410.01537","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.01537","pdf_url":"https://arxiv.org/pdf/2410.01537","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2410.01537","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5107174740","display_name":"Pierre Marion","orcid":null},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Marion, Pierre","raw_affiliation_strings":["D\u00e9partement de Math\u00e9matiques - EPFL (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne D\u00e9partement de Math\u00e9matiques 1015 Lausanne - Switzerland)"],"affiliations":[{"raw_affiliation_string":"D\u00e9partement de Math\u00e9matiques - EPFL (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne D\u00e9partement de Math\u00e9matiques 1015 Lausanne - Switzerland)","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026477644","display_name":"Rapha\u00ebl Berthier","orcid":null},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en sciences et technologies du num\u00e9rique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283"]},{"id":"https://openalex.org/I39804081","display_name":"Sorbonne Universit\u00e9","ror":"https://ror.org/02en5vm52","country_code":"FR","type":"education","lineage":["https://openalex.org/I39804081"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Berthier, Rapha\u00ebl","raw_affiliation_strings":["SU - Sorbonne Universite\u0301 (21 rue de l\u2019\u00c9cole de m\u00e9decine - 75006 Paris - France)","Centre Inria de Paris (48 Rue Barrault, 75013 Paris - France)"],"affiliations":[{"raw_affiliation_string":"SU - Sorbonne Universite\u0301 (21 rue de l\u2019\u00c9cole de m\u00e9decine - 75006 Paris - France)","institution_ids":["https://openalex.org/I39804081"]},{"raw_affiliation_string":"Centre Inria de Paris (48 Rue Barrault, 75013 Paris - France)","institution_ids":["https://openalex.org/I1326498283"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106038334","display_name":"G\u00e9rard Biau","orcid":"https://orcid.org/0000-0001-8238-4471"},"institutions":[{"id":"https://openalex.org/I4210147464","display_name":"Laboratoire de Probabilit\u00e9s et Mod\u00e8les Al\u00e9atoires","ror":"https://ror.org/04hjc7403","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I204730241","https://openalex.org/I39804081","https://openalex.org/I4210141950","https://openalex.org/I4210147464"]},{"id":"https://openalex.org/I4387155306","display_name":"Laboratoire de Probabilit\u00e9s, Statistique et Mod\u00e9lisation","ror":"https://ror.org/02vnd0e65","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I204730241","https://openalex.org/I39804081","https://openalex.org/I4387155306"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Biau, G\u00e9rard","raw_affiliation_strings":["LPSM (UMR_8001) - Laboratoire de Probabilit\u00e9s, Statistique et Mod\u00e9lisation (Campus Jussieu Tour 16-26, 1er \u00e9tage 4, Place Jussieu 75005 Paris / \r\n\r\nB\u00e2timent Sophie Germain 5\u00e8me \u00e9tage Avenue de France 75013 Paris - France)"],"affiliations":[{"raw_affiliation_string":"LPSM (UMR_8001) - Laboratoire de Probabilit\u00e9s, Statistique et Mod\u00e9lisation (Campus Jussieu Tour 16-26, 1er \u00e9tage 4, Place Jussieu 75005 Paris / \r\n\r\nB\u00e2timent Sophie Germain 5\u00e8me \u00e9tage Avenue de France 75013 Paris - France)","institution_ids":["https://openalex.org/I4210147464","https://openalex.org/I4387155306"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010301554","display_name":"Claire Boyer","orcid":null},"institutions":[{"id":"https://openalex.org/I3019441195","display_name":"Laboratoire de Math\u00e9matiques d'Orsay","ror":"https://ror.org/03ab0zs98","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I277688954","https://openalex.org/I3019441195"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Boyer, Claire","raw_affiliation_strings":["LMO - Laboratoire de Math\u00e9matiques d'Orsay (B\u00e2timent 307, 91405, Orsay cedex - France)"],"affiliations":[{"raw_affiliation_string":"LMO - Laboratoire de Math\u00e9matiques d'Orsay (B\u00e2timent 307, 91405, Orsay cedex - France)","institution_ids":["https://openalex.org/I3019441195"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5107174740"],"corresponding_institution_ids":["https://openalex.org/I5124864"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.9901000261306763,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9868000149726868,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5657659769058228},{"id":"https://openalex.org/keywords/regression","display_name":"Regression","score":0.5297447443008423},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.2839764356613159},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2144942283630371}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5657659769058228},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.5297447443008423},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.2839764356613159},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2144942283630371}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2410.01537","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.01537","pdf_url":"https://arxiv.org/pdf/2410.01537","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2410.01537","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2410.01537","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2410.01537","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2410.01537","pdf_url":"https://arxiv.org/pdf/2410.01537","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.5099999904632568,"id":"https://metadata.un.org/sdg/13","display_name":"Climate action"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4403116247.pdf","grobid_xml":"https://content.openalex.org/works/W4403116247.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Attention-based":[0],"models,":[1],"such":[2],"as":[3],"Transformer,":[4],"excel":[5],"across":[6],"various":[7],"tasks":[8],"but":[9],"lack":[10],"a":[11,38,47,53,65,73,77],"comprehensive":[12],"theoretical":[13,84],"understanding,":[14],"especially":[15],"regarding":[16],"token-wise":[17],"sparsity":[18],"and":[19,43,92,126],"internal":[20,127],"linear":[21,54,128],"representations.":[22],"To":[23,59],"address":[24],"this":[25,61],"gap,":[26],"we":[27,63],"introduce":[28],"the":[29,41,57,100,104,106,110,116],"single-location":[30],"regression":[31],"task,":[32,62],"where":[33],"only":[34],"one":[35],"token":[36,124],"in":[37],"sequence":[39],"determines":[40],"output,":[42],"its":[44,83,88,94],"position":[45],"is":[46],"latent":[48],"random":[49],"variable,":[50],"retrievable":[51],"via":[52],"projection":[55],"of":[56,76,103,118],"input.":[58],"solve":[60],"propose":[64],"dedicated":[66],"predictor,":[67],"which":[68],"turns":[69],"out":[70],"to":[71,121],"be":[72],"simplified":[74],"version":[75],"non-linear":[78],"self-attention":[79],"layer.":[80],"We":[81],"study":[82],"properties,":[85],"by":[86],"showing":[87],"asymptotic":[89],"Bayes":[90],"optimality":[91],"analyzing":[93],"training":[95],"dynamics.":[96],"In":[97],"particular,":[98],"despite":[99],"non-convex":[101],"nature":[102],"problem,":[105],"predictor":[107],"effectively":[108],"learns":[109],"underlying":[111],"structure.":[112],"This":[113],"work":[114],"highlights":[115],"capacity":[117],"attention":[119],"mechanisms":[120],"handle":[122],"sparse":[123],"information":[125],"structures.":[129]},"counts_by_year":[],"updated_date":"2026-03-12T08:34:05.389933","created_date":"2025-10-10T00:00:00"}
