{"id":"https://openalex.org/W4417453663","doi":"https://doi.org/10.48550/arxiv.2512.13109","title":"Uncovering the Role of Initial Saliency in U-Shaped Attention Bias: Scaling Initial Token Weight for Enhanced Long-Text Processing","display_name":"Uncovering the Role of Initial Saliency in U-Shaped Attention Bias: Scaling Initial Token Weight for Enhanced Long-Text Processing","publication_year":2025,"publication_date":"2025-12-15","ids":{"openalex":"https://openalex.org/W4417453663","doi":"https://doi.org/10.48550/arxiv.2512.13109"},"language":null,"primary_location":{"id":"pmh:oai:arXiv.org:2512.13109","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.13109","pdf_url":"https://arxiv.org/pdf/2512.13109","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2512.13109","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120842121","display_name":"Zewen Qiang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Qiang, Zewen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067025788","display_name":"Sendong Zhao","orcid":"https://orcid.org/0000-0002-4676-1812"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Sendong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075514969","display_name":"Haochun Wang","orcid":"https://orcid.org/0000-0003-2908-9750"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Haochun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048691999","display_name":"Bing Qin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qin, Bing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100320723","display_name":"Zhiyuan Liu","orcid":"https://orcid.org/0000-0002-7709-2543"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Ting","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5120842121"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3709999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.3709999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.1867000013589859,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.07270000129938126,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.8636000156402588},{"id":"https://openalex.org/keywords/property","display_name":"Property (philosophy)","score":0.5965999960899353},{"id":"https://openalex.org/keywords/position","display_name":"Position (finance)","score":0.552299976348877},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5508000254631042},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.5396999716758728},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.5188999772071838}],"concepts":[{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.8636000156402588},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6991999745368958},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.5965999960899353},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.552299976348877},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5508000254631042},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.5396999716758728},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.5188999772071838},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5166000127792358},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.4814999997615814},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4325000047683716},{"id":"https://openalex.org/C2993807640","wikidata":"https://www.wikidata.org/wiki/Q103709453","display_name":"Attention network","level":2,"score":0.30230000615119934},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.29109999537467957},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.28220000863075256},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27140000462532043},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2705000042915344},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.25850000977516174}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2512.13109","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.13109","pdf_url":"https://arxiv.org/pdf/2512.13109","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2512.13109","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.13109","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2512.13109","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.13109","pdf_url":"https://arxiv.org/pdf/2512.13109","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4417453663.pdf","grobid_xml":"https://content.openalex.org/works/W4417453663.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"language":[1,13],"models":[2],"(LLMs)":[3],"have":[4,65],"demonstrated":[5],"strong":[6],"performance":[7],"on":[8,49],"a":[9,40,55,139,163],"variety":[10],"of":[11,54,109,142,166],"natural":[12],"processing":[14],"(NLP)":[15],"tasks.":[16,170],"However,":[17],"they":[18],"often":[19],"struggle":[20],"with":[21,92,151],"long-text":[22],"sequences":[23],"due":[24],"to":[25,37,69,97,102,134,154],"the":[26,29,50,58,85,98,107,110,125,131],"``lost":[27],"in":[28,84,106,144,168],"middle''":[30],"phenomenon.":[31],"This":[32],"issue":[33],"has":[34],"been":[35],"shown":[36],"arise":[38],"from":[39],"U-shaped":[41],"attention":[42,45,86,94,105,122],"bias,":[43],"where":[44],"is":[46],"disproportionately":[47],"focused":[48],"beginning":[51],"and":[52,128],"end":[53],"text,":[56],"leaving":[57],"middle":[59],"section":[60],"underrepresented.":[61],"While":[62],"previous":[63],"studies":[64],"attributed":[66],"this":[67,118,149],"bias":[68,158],"position":[70,156],"encoding,":[71],"our":[72],"research":[73],"first":[74],"identifies":[75],"an":[76],"additional":[77],"factor:":[78],"initial":[79,99,126],"saliency.":[80],"It":[81],"means":[82],"that":[83,116],"computation":[87],"for":[88],"each":[89],"token,":[90],"tokens":[91],"higher":[93],"weights":[95],"relative":[96],"token":[100,127],"tend":[101],"receive":[103],"more":[104],"prediction":[108],"next":[111],"token.":[112],"We":[113],"further":[114,159],"find":[115],"utilizing":[117],"property":[119],"by":[120],"scaling":[121],"weight":[123],"between":[124],"others":[129],"improves":[130],"model's":[132],"ability":[133],"process":[135],"long":[136],"contexts,":[137],"achieving":[138,162],"maximum":[140,164],"improvement":[141,165],"3.6\\%":[143],"MDQA":[145],"dataset.":[146],"Moreover,":[147],"combining":[148],"approach":[150],"existing":[152],"methods":[153],"reduce":[155],"encoding":[157],"enhances":[160],"performance,":[161],"3.4\\%":[167],"KV-Retrieval":[169]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-12-17T00:00:00"}
