{"id":"https://openalex.org/W4412888706","doi":"https://doi.org/10.18653/v1/2025.findings-acl.242","title":"DAM: Dynamic Attention Mask for Long-Context Large Language Model Inference Acceleration","display_name":"DAM: Dynamic Attention Mask for Long-Context Large Language Model Inference Acceleration","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412888706","doi":"https://doi.org/10.18653/v1/2025.findings-acl.242"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.findings-acl.242","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.242","pdf_url":"https://aclanthology.org/2025.findings-acl.242.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-acl.242.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015298912","display_name":"Hanzhi Zhang","orcid":"https://orcid.org/0000-0002-9693-4005"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hanzhi Zhang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047220188","display_name":"Heng Fan","orcid":"https://orcid.org/0000-0002-7033-3690"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Heng Fan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061053956","display_name":"Kewei Sha","orcid":"https://orcid.org/0000-0002-3750-163X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kewei Sha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022374886","display_name":"Yan Huang","orcid":"https://orcid.org/0000-0001-7775-4597"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan Huang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5073748933","display_name":"Yunhe Feng","orcid":"https://orcid.org/0000-0001-6577-227X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yunhe Feng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08624524,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4663","last_page":"4676"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.996999979019165,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9857000112533569,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9686999917030334,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.7602972984313965},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.7508795261383057},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7352579236030579},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6388165354728699},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.45663052797317505},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4354970455169678},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.42095476388931274},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.38601842522621155},{"id":"https://openalex.org/keywords/history","display_name":"History","score":0.12624123692512512},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.050646066665649414}],"concepts":[{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.7602972984313965},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.7508795261383057},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7352579236030579},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6388165354728699},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.45663052797317505},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4354970455169678},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.42095476388931274},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.38601842522621155},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.12624123692512512},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.050646066665649414},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-acl.242","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.242","pdf_url":"https://aclanthology.org/2025.findings-acl.242.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-acl.242","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-acl.242","pdf_url":"https://aclanthology.org/2025.findings-acl.242.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: ACL 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412888706.pdf","grobid_xml":"https://content.openalex.org/works/W4412888706.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2565094479","https://openalex.org/W2390829436","https://openalex.org/W1989791859","https://openalex.org/W602859758","https://openalex.org/W1971289376","https://openalex.org/W2379101322","https://openalex.org/W2055243143","https://openalex.org/W1992553864","https://openalex.org/W2776207444","https://openalex.org/W4385890381"],"abstract_inverted_index":{"Long-context":[0],"understanding":[1],"is":[2,132],"crucial":[3],"for":[4,79],"many":[5],"NLP":[6],"applications,":[7],"yet":[8],"transformers":[9],"struggle":[10],"with":[11,97],"efficiency":[12],"due":[13],"to":[14,32,115],"the":[15,62,77,119],"quadratic":[16],"complexity":[17],"of":[18,122],"self-attention.Sparse":[19],"attention":[20,35,55,91],"methods":[21],"alleviate":[22],"this":[23],"cost":[24],"but":[25],"often":[26],"impose":[27],"static,":[28],"predefined":[29,82],"masks,":[30],"failing":[31],"capture":[33],"heterogeneous":[34,66],"patterns.This":[36],"results":[37],"in":[38,47],"suboptimal":[39],"token":[40],"interactions,":[41],"limiting":[42],"adaptability":[43],"and":[44,70,81,107],"retrieval":[45,130],"accuracy":[46],"long-sequence":[48],"tasks.This":[49],"work":[50],"introduces":[51],"a":[52,112],"dynamic":[53],"sparse":[54],"mechanism":[56],"that":[57],"assigns":[58],"adaptive":[59],"masks":[60],"at":[61],"attention-map":[63],"level,":[64],"preserving":[65],"patterns":[67],"across":[68],"layers":[69],"heads.Unlike":[71],"existing":[72],"approaches,":[73],"our":[74],"method":[75],"eliminates":[76],"need":[78],"fine-tuning":[80],"mask":[83],"structures":[84],"while":[85,104],"maintaining":[86],"computational":[87],"efficiency.By":[88],"learning":[89],"context-aware":[90],"structures,":[92],"it":[93],"achieves":[94],"high":[95],"alignment":[96],"full-attention":[98],"models,":[99],"ensuring":[100],"minimal":[101],"performance":[102],"degradation":[103],"reducing":[105],"memory":[106],"compute":[108],"overhead.This":[109],"approach":[110],"provides":[111],"scalable":[113],"alternative":[114],"full":[116],"attention,":[117],"enabling":[118],"practical":[120],"deployment":[121],"largescale":[123],"Large":[124],"Language":[125],"Models":[126],"(LLMs)":[127],"without":[128],"sacrificing":[129],"performance.DAM":[131],"available":[133],"at:":[134],"https://github.com/":[135],"HanzhiZhang-Ulrica/DAM.":[136]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
