{"id":"https://openalex.org/W4408709442","doi":"https://doi.org/10.1109/tnnls.2025.3548047","title":"2-D Transformer: Extending Large Language Models to Long-Context With Few Memory","display_name":"2-D Transformer: Extending Large Language Models to Long-Context With Few Memory","publication_year":2025,"publication_date":"2025-03-21","ids":{"openalex":"https://openalex.org/W4408709442","doi":"https://doi.org/10.1109/tnnls.2025.3548047","pmid":"https://pubmed.ncbi.nlm.nih.gov/40117155"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2025.3548047","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3548047","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Xingyang He","orcid":"https://orcid.org/0009-0008-1202-7784"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xingyang He","raw_affiliation_strings":["College of Artificial Intelligence, Nankai University, Tianjin, China"],"raw_orcid":"https://orcid.org/0009-0008-1202-7784","affiliations":[{"raw_affiliation_string":"College of Artificial Intelligence, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101603878","display_name":"Jie Liu","orcid":"https://orcid.org/0000-0001-5544-8417"},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jie Liu","raw_affiliation_strings":["National Key Laboratory of Intelligent Tracking and Forecasting for Infectious Diseases, Engineering Research Center of Trusted Behavior Intelligence, Ministry of Education, College of Artificial Intelligence, Nankai University, Tianjin, China"],"raw_orcid":"https://orcid.org/0000-0001-5544-8417","affiliations":[{"raw_affiliation_string":"National Key Laboratory of Intelligent Tracking and Forecasting for Infectious Diseases, Engineering Research Center of Trusted Behavior Intelligence, Ministry of Education, College of Artificial Intelligence, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005318753","display_name":"Yutai Duan","orcid":null},"institutions":[{"id":"https://openalex.org/I205237279","display_name":"Nankai University","ror":"https://ror.org/01y1kjr75","country_code":"CN","type":"education","lineage":["https://openalex.org/I205237279"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yutai Duan","raw_affiliation_strings":["College of Artificial Intelligence, Nankai University, Tianjin, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Artificial Intelligence, Nankai University, Tianjin, China","institution_ids":["https://openalex.org/I205237279"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I205237279"],"apc_list":null,"apc_paid":null,"fwci":6.5198,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.95820164,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":"36","issue":"8","first_page":"15294","last_page":"15308"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.958899974822998,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.958899974822998,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9445000290870667,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5744297504425049},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.525704562664032},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.11885643005371094},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.11465787887573242}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5744297504425049},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.525704562664032},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.11885643005371094},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.11465787887573242},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2025.3548047","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2025.3548047","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:40117155","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40117155","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1527415974","display_name":null,"funder_award_id":"24ZXZSSS00420","funder_id":"https://openalex.org/F4320326190","funder_display_name":"Tianjin Science and Technology Committee"},{"id":"https://openalex.org/G2564614147","display_name":null,"funder_award_id":"23YFZXYC00029","funder_id":"https://openalex.org/F4320313610","funder_display_name":"Shanghai Science and Technology Development Foundation"},{"id":"https://openalex.org/G3521512097","display_name":null,"funder_award_id":"62376129","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4515799127","display_name":null,"funder_award_id":"2023YFF0725003","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320313610","display_name":"Shanghai Science and Technology Development Foundation","ror":null},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320326190","display_name":"Tianjin Science and Technology Committee","ror":null},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W2070246124","https://openalex.org/W2946609015","https://openalex.org/W2963204221","https://openalex.org/W2963339397","https://openalex.org/W2989743967","https://openalex.org/W3101498587","https://openalex.org/W3106298483","https://openalex.org/W3119438769","https://openalex.org/W3170490008","https://openalex.org/W3171639395","https://openalex.org/W4225727438","https://openalex.org/W4385245566","https://openalex.org/W4389318028","https://openalex.org/W4389524599","https://openalex.org/W4400111590","https://openalex.org/W4401042382","https://openalex.org/W4401042914","https://openalex.org/W4404781337","https://openalex.org/W4404781668","https://openalex.org/W4404782964","https://openalex.org/W4412888613","https://openalex.org/W6761628794","https://openalex.org/W6770251742","https://openalex.org/W6776048684","https://openalex.org/W6778883912","https://openalex.org/W6779163297","https://openalex.org/W6781533629","https://openalex.org/W6783944145","https://openalex.org/W6796581206","https://openalex.org/W6797927333","https://openalex.org/W6799372109","https://openalex.org/W6801617135","https://openalex.org/W6802386650","https://openalex.org/W6804126242","https://openalex.org/W6810028700","https://openalex.org/W6810730852","https://openalex.org/W6811440883","https://openalex.org/W6837789219","https://openalex.org/W6853318610","https://openalex.org/W6853621311","https://openalex.org/W6854054124","https://openalex.org/W6854186793","https://openalex.org/W6854308872","https://openalex.org/W6854866820","https://openalex.org/W6855916431","https://openalex.org/W6856685690","https://openalex.org/W6857690716","https://openalex.org/W6859201984","https://openalex.org/W6859392132","https://openalex.org/W6882949532"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"The":[0,86,132,178],"ability":[1],"of":[2,78,95,123,129,146,153,187,196],"processing":[3,53],"long":[4],"contexts":[5],"is":[6,211],"crucial":[7],"for":[8],"large":[9],"language":[10],"models":[11],"(LLMs),":[12],"but":[13],"training":[14],"LLMs":[15,80],"with":[16,42,144,170,205,213,228,255],"a":[17,38,63,89,96,102,121],"long-context":[18,52,242,253,270],"window":[19],"requires":[20],"substantial":[21],"computational":[22,176,260],"resources.":[23],"Many":[24],"sought":[25],"to":[26,50,141,166,182,191,199],"mitigate":[27],"this":[28,58,60,157],"through":[29],"the":[30,75,127,151,168,193],"sparse":[31,35,65,154,159],"attention":[32,36,44,91,104,160,172],"mechanism.":[33,106],"However,":[34],"faces":[37],"noticeable":[39],"gap":[40,169],"compared":[41],"full":[43,171],"in":[45],"capturing":[46],"long-distance":[47,97,118,164],"information,":[48],"leading":[49],"limited":[51],"capabilities.":[54],"To":[55],"effectively":[56],"address":[57],"issue,":[59],"article":[61],"proposes":[62],"novel":[64],"transformer":[66,70],"architecture":[67],"called":[68],"2-D":[69,90],"(2D-former),":[71],"aimed":[72],"at":[73],"extending":[74],"context":[76,194],"windows":[77],"pretrained":[79],"while":[81,173,263],"reducing":[82,175],"GPU":[83,257],"memory":[84,258],"requirements.":[85,177],"2D-former":[87,179,229,250],"incorporates":[88],"mechanism":[92,134],"that":[93,249],"consists":[94],"information":[98,119,165],"compressor":[99],"(LDIC)":[100],"and":[101,116,218,259,271],"blockwise":[103,111],"(BA)":[105],"LDIC":[107],"can":[108,161],"self-adaptively":[109],"extract":[110],"representational":[112],"features":[113],"by":[114],"convolution":[115],"compress":[117],"into":[120],"set":[122],"tokens":[124,149],"based":[125],"on":[126,201],"significance":[128],"each":[130,139],"block.":[131],"BA":[133],"integrates":[135],"these":[136],"features,":[137],"enabling":[138],"token":[140],"directly":[142],"communicate":[143],"any":[145],"its":[147],"preceding":[148],"during":[150],"computation":[152],"attention.":[155],"In":[156,208],"way,":[158],"fully":[162],"utilize":[163],"bridge":[167],"greatly":[174],"only":[180],"needs":[181],"add":[183],"less":[184],"than":[185],"0.14%":[186],"additional":[188],"trainable":[189],"parameters":[190],"extend":[192],"length":[195],"LLaMA2":[197],"7B":[198],"32k":[200],"4":[202],"A100":[203],"GPUs":[204],"40-GB":[206],"memory.":[207],"addition,":[209],"it":[210],"compatible":[212],"most":[214],"current":[215],"acceleration":[216],"techniques":[217],"parameter-efficient":[219],"fine-tuning":[220,227,234],"(PEFT)":[221],"methods.":[222],"Furthermore,":[223],"we":[224],"conduct":[225],"supervised":[226],"using":[230],"our":[231],"self-collected":[232],"long-instruction":[233],"dataset,":[235],"named":[236],"LongTuning,":[237],"which":[238],"comprises":[239],"over":[240],"11k":[241],"question-answer":[243],"(QA)":[244],"pairs.":[245],"Experimental":[246],"results":[247],"demonstrate":[248],"achieves":[251],"efficient":[252],"extension":[254],"minimal":[256],"time":[261],"consumption,":[262],"maintaining":[264],"superior":[265],"performance":[266],"across":[267],"both":[268],"downstream":[269],"short-context":[272],"tasks.":[273]},"counts_by_year":[{"year":2025,"cited_by_count":3}],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
