{"id":"https://openalex.org/W4392523542","doi":"https://doi.org/10.1109/taslp.2024.3374062","title":"BaSFormer: A Balanced Sparsity Regularized Attention Network for Transformer","display_name":"BaSFormer: A Balanced Sparsity Regularized Attention Network for Transformer","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4392523542","doi":"https://doi.org/10.1109/taslp.2024.3374062"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3374062","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/taslp.2024.3374062","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5040208886","display_name":"Shuoran Jiang","orcid":"https://orcid.org/0000-0001-5888-0328"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shuoran Jiang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China","Peng Cheng Laboratory, China","Harbin Institute of Technology (Shenzhen), China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Peng Cheng Laboratory, China","institution_ids":["https://openalex.org/I4210136793"]},{"raw_affiliation_string":"Harbin Institute of Technology (Shenzhen), China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101895423","display_name":"Qingcai Chen","orcid":"https://orcid.org/0000-0001-8473-7293"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingcai Chen","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China","Peng Cheng Laboratory, China","Harbin Institute of Technology (Shenzhen), China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Peng Cheng Laboratory, China","institution_ids":["https://openalex.org/I4210136793"]},{"raw_affiliation_string":"Harbin Institute of Technology (Shenzhen), China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101632572","display_name":"Yang Xiang","orcid":"https://orcid.org/0000-0003-1395-6805"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Xiang","raw_affiliation_strings":["Peng Cheng Laboratory, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004433495","display_name":"Youcheng Pan","orcid":"https://orcid.org/0000-0002-8270-5455"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Youcheng Pan","raw_affiliation_strings":["Peng Cheng Laboratory, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050877497","display_name":"Xiangping Wu","orcid":"https://orcid.org/0000-0002-5267-2250"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangping Wu","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China","Harbin Institute of Technology (Shenzhen), China"],"affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"Harbin Institute of Technology (Shenzhen), China","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5040208886"],"corresponding_institution_ids":["https://openalex.org/I204983213","https://openalex.org/I4210136793"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.01856906,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"32","issue":null,"first_page":"2125","last_page":"2140"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12611","display_name":"Neural Networks and Reservoir Computing","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10688","display_name":"Image and Signal Denoising Methods","score":0.9557999968528748,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5780309438705444},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.47931209206581116},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3289225697517395},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.16427907347679138},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1386929750442505},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.07246893644332886}],"concepts":[{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5780309438705444},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.47931209206581116},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3289225697517395},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.16427907347679138},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1386929750442505},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.07246893644332886}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2024.3374062","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/taslp.2024.3374062","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2753377255","display_name":null,"funder_award_id":"62006062","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G584715664","display_name":null,"funder_award_id":"20&ZD226","funder_id":"https://openalex.org/F4320335869","funder_display_name":"National Social Science Fund of China"},{"id":"https://openalex.org/G6233398042","display_name":null,"funder_award_id":"62106115","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6312846351","display_name":null,"funder_award_id":"62176076","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6430788580","display_name":null,"funder_award_id":"62276075","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8627319070","display_name":null,"funder_award_id":"2023M741843","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G888125545","display_name":null,"funder_award_id":"XMHT20190108009","funder_id":"https://openalex.org/F4320327511","funder_display_name":"Development and Reform Commission of Shenzhen Municipality"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"},{"id":"https://openalex.org/F4320327511","display_name":"Development and Reform Commission of Shenzhen Municipality","ror":"https://ror.org/03jmg4515"},{"id":"https://openalex.org/F4320335869","display_name":"National Social Science Fund of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":109,"referenced_works":["https://openalex.org/W183625566","https://openalex.org/W1566289585","https://openalex.org/W2257408573","https://openalex.org/W2760656271","https://openalex.org/W2896457183","https://openalex.org/W2916548775","https://openalex.org/W2923664576","https://openalex.org/W2941435824","https://openalex.org/W2950858167","https://openalex.org/W2951568144","https://openalex.org/W2962784628","https://openalex.org/W2962822108","https://openalex.org/W2962915948","https://openalex.org/W2963062480","https://openalex.org/W2963434219","https://openalex.org/W2963744496","https://openalex.org/W2963846996","https://openalex.org/W2963961878","https://openalex.org/W2964247056","https://openalex.org/W2964302946","https://openalex.org/W2965373594","https://openalex.org/W2970019270","https://openalex.org/W2970038129","https://openalex.org/W2970379526","https://openalex.org/W2970641574","https://openalex.org/W2970832665","https://openalex.org/W2972324944","https://openalex.org/W2984256198","https://openalex.org/W2988975212","https://openalex.org/W2996428491","https://openalex.org/W3015468748","https://openalex.org/W3034773362","https://openalex.org/W3034831508","https://openalex.org/W3035289598","https://openalex.org/W3093871477","https://openalex.org/W3094045953","https://openalex.org/W3100753857","https://openalex.org/W3104169042","https://openalex.org/W3109205936","https://openalex.org/W3118781290","https://openalex.org/W3125498921","https://openalex.org/W3152911627","https://openalex.org/W3154229486","https://openalex.org/W3154456300","https://openalex.org/W3167739156","https://openalex.org/W3173787059","https://openalex.org/W3175352680","https://openalex.org/W3175665465","https://openalex.org/W3175746962","https://openalex.org/W3176112445","https://openalex.org/W3180244179","https://openalex.org/W3204601267","https://openalex.org/W3217672792","https://openalex.org/W4213019189","https://openalex.org/W4220902914","https://openalex.org/W4224267386","https://openalex.org/W4230920021","https://openalex.org/W4238591275","https://openalex.org/W4281492411","https://openalex.org/W4281782777","https://openalex.org/W4282569005","https://openalex.org/W4285113702","https://openalex.org/W4285200900","https://openalex.org/W4287890648","https://openalex.org/W4288089072","https://openalex.org/W4288089799","https://openalex.org/W4293704625","https://openalex.org/W4300862723","https://openalex.org/W4306955484","https://openalex.org/W4322718191","https://openalex.org/W4324370640","https://openalex.org/W4362515116","https://openalex.org/W4362707064","https://openalex.org/W4372347502","https://openalex.org/W4385245566","https://openalex.org/W4385572918","https://openalex.org/W4385574074","https://openalex.org/W4387478897","https://openalex.org/W4389519823","https://openalex.org/W4392454577","https://openalex.org/W6685160515","https://openalex.org/W6691459498","https://openalex.org/W6691766336","https://openalex.org/W6738577479","https://openalex.org/W6762471145","https://openalex.org/W6762521896","https://openalex.org/W6765039553","https://openalex.org/W6766279884","https://openalex.org/W6766673545","https://openalex.org/W6767737316","https://openalex.org/W6768021236","https://openalex.org/W6769386472","https://openalex.org/W6769627184","https://openalex.org/W6769692749","https://openalex.org/W6776048684","https://openalex.org/W6781533629","https://openalex.org/W6783433986","https://openalex.org/W6784447870","https://openalex.org/W6788175385","https://openalex.org/W6793627087","https://openalex.org/W6794146050","https://openalex.org/W6797610904","https://openalex.org/W6804272589","https://openalex.org/W6805143542","https://openalex.org/W6811242705","https://openalex.org/W6845463252","https://openalex.org/W6850625674","https://openalex.org/W6851775633","https://openalex.org/W6851960618"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W4402327032","https://openalex.org/W2382290278"],"abstract_inverted_index":{"Attention":[0,44],"networks":[1],"often":[2],"make":[3],"decisions":[4],"relying":[5],"solely":[6],"on":[7,112,126],"a":[8,83,105],"few":[9],"pieces":[10],"of":[11,21,27,75,87,114,174,181,188,226,231],"tokens,":[12],"even":[13],"if":[14],"those":[15],"reliances":[16],"are":[17],"not":[18],"truly":[19],"indicative":[20],"the":[22,28,73,85,93,115,122,135,172,178,186,202,224,232,237],"underlying":[23],"meaning":[24],"or":[25],"intention":[26],"full":[29],"context.":[30],"This":[31],"can":[32,198,261],"lead":[33],"to":[34,42,52,96,146,167,229],"over-fitting":[35],"in":[36,134,177,250,266],"transformers":[37],"and":[38,46,151,184,240,272],"hinder":[39],"their":[40],"ability":[41],"generalize.":[43],"regularization":[45,120],"sparsity-based":[47],"methods":[48,58],"have":[49,64,162],"been":[50],"used":[51,200],"overcome":[53],"this":[54,255],"issue.":[55],"However,":[56],"these":[57,88,101],"cannot":[59,78],"guarantee":[60],"that":[61,159,192,221,230,259],"all":[62,160],"tokens":[63,161,176],"sufficient":[65],"receptive":[66,165],"fields":[67],"for":[68,201],"global":[69,175],"information":[70],"inference.":[71],"Thus,":[72],"impact":[74,187],"individual":[76,189],"biases":[77],"be":[79,199],"effectively":[80],"reduced.":[81],"As":[82],"result,":[84],"generalization":[86],"approaches":[89],"improved":[90,223],"slightly":[91],"from":[92],"training":[94],"data":[95],"new":[97,246],"data.":[98],"To":[99],"address":[100],"limitations,":[102],"we":[103,206],"propose":[104,207],"balanced":[106],"sparsity":[107],"(BaS)":[108],"regularized":[109],"attention":[110,136,144],"network":[111],"top":[113],"transformers,":[116],"called":[117],"BaSFormer.":[118],"BaS":[119],"introduces":[121],"K-regular":[123,155,203],"graph":[124,156,204],"constraint":[125,157],"self-attention":[127],"connections,":[128,148],"which":[129,170,269],"replaces":[130],"SoftMax":[131],"with":[132,212],"SparseMax":[133,141],"transformation.":[137],"In":[138,242],"BaS-regularized":[139],"self-attention,":[140],"assigns":[142],"zero":[143],"scores":[145],"low-scoring":[147],"highlighting":[149],"influential":[150],"meaningful":[152],"contexts.":[153],"The":[154,217],"ensures":[158],"an":[163,208,213],"equal-sized":[164],"field":[166],"aggregate":[168],"information,":[169],"facilitates":[171],"involvement":[173],"feature":[179],"update":[180],"each":[182],"layer":[183],"reduces":[185],"biases.":[190],"Given":[191],"there":[193],"is":[194],"no":[195],"continuous":[196],"loss":[197,211],"regularization,":[205],"exponential":[209],"extremum":[210],"augmented":[214],"Lagrangian":[215],"function.":[216],"experimental":[218],"results":[219,249],"showed":[220],"BaSFormer":[222,244,260],"effectiveness":[225],"debiasing":[227],"compared":[228],"newest":[233],"LLMs,":[234],"such":[235],"as":[236],"GPT-3.5,":[238],"GPT-4":[239],"LLaMA.":[241],"addition,":[243],"achieves":[245],"state-of-the-art":[247],"(SOTA)":[248],"text":[251],"generation":[252],"tasks.":[253],"Interestingly,":[254],"work":[256],"also":[257],"shows":[258],"learn":[262],"hierarchical":[263],"linguistic":[264],"dependencies":[265],"gradient":[267],"attributions,":[268],"improves":[270],"interpretability":[271],"adversarial":[273],"robustness.":[274]},"counts_by_year":[],"updated_date":"2026-03-27T14:29:43.386196","created_date":"2025-10-10T00:00:00"}
