{"id":"https://openalex.org/W4412944907","doi":"https://doi.org/10.18653/v1/2025.acl-long.947","title":"Segment-Based Attention Masking for GPTs","display_name":"Segment-Based Attention Masking for GPTs","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412944907","doi":"https://doi.org/10.18653/v1/2025.acl-long.947"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.acl-long.947","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.acl-long.947","pdf_url":"https://aclanthology.org/2025.acl-long.947.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.acl-long.947.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115653631","display_name":"Shahar Katz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shahar Katz","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093854356","display_name":"Liran Ringel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liran Ringel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114803503","display_name":"Yaniv Romano","orcid":"https://orcid.org/0000-0001-8951-8583"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yaniv Romano","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5076279043","display_name":"Lior Wolf","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lior Wolf","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08856397,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"19308","last_page":"19322"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.919700026512146,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.919700026512146,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.8849647045135498},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5364829301834106},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.3428648114204407},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.08929824829101562},{"id":"https://openalex.org/keywords/visual-arts","display_name":"Visual arts","score":0.03630852699279785}],"concepts":[{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.8849647045135498},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5364829301834106},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.3428648114204407},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.08929824829101562},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.03630852699279785}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.acl-long.947","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.acl-long.947","pdf_url":"https://aclanthology.org/2025.acl-long.947.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.acl-long.947","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.acl-long.947","pdf_url":"https://aclanthology.org/2025.acl-long.947.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G6367023377","display_name":null,"funder_award_id":"MJFF-022407","funder_id":"https://openalex.org/F4320306136","funder_display_name":"Michael J. Fox Foundation for Parkinson's Research"},{"id":"https://openalex.org/G7308213304","display_name":"Uniting Statistical Testing and Machine Learning for Safe Predictions","funder_award_id":"101163414","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320306136","display_name":"Michael J. Fox Foundation for Parkinson's Research","ror":"https://ror.org/03arq3225"},{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320322596","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412944907.pdf","grobid_xml":"https://content.openalex.org/works/W4412944907.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W4404995717","https://openalex.org/W2016187641","https://openalex.org/W4404725684","https://openalex.org/W4246450666","https://openalex.org/W4388998267","https://openalex.org/W4409278740","https://openalex.org/W2898370298"],"abstract_inverted_index":{"Causal":[0],"masking":[1,29],"is":[2,30,70,100,115,145],"a":[3,13,93,118,139,161],"fundamental":[4],"component":[5],"in":[6,92,130,138,147],"Generative":[7],"Pre-Trained":[8],"Transformer":[9],"(GPT)":[10],"models,":[11],"playing":[12],"crucial":[14],"role":[15],"during":[16,45],"training.Although":[17],"GPTs":[18],"can":[19,133],"process":[20,88],"the":[21,27,38,46,51,54,59,74,79,84,97,106,110,121,127,135,142,148],"entire":[22],"user":[23,107],"prompt":[24,56,99,108],"at":[25,78],"once,":[26],"causal":[28,150],"applied":[31],"to":[32],"all":[33],"input":[34,55],"tokens":[35,129,137],"step-by-step,":[36],"mimicking":[37],"generation":[39],"process.This":[40],"imposes":[41],"an":[42],"unnecessary":[43],"constraint":[44],"initial":[47],"\"prefill\"":[48],"phase":[49],"when":[50],"model":[52,143],"processes":[53],"and":[57,105,171],"generates":[58],"internal":[60],"representations":[61],"before":[62],"producing":[63],"any":[64],"output":[65],"tokens.In":[66],"this":[67],"work,":[68],"attention":[69],"masked":[71],"based":[72],"on":[73],"known":[75],"block":[76,132],"structure":[77],"prefill":[80],"phase,":[81],"followed":[82],"by":[83],"conventional":[85,149],"token-by-token":[86],"autoregressive":[87],"after":[89],"that.For":[90],"example,":[91],"typical":[94],"chat":[95],"prompt,":[96],"system":[98],"treated":[101,116],"as":[102,109,117,169],"one":[103],"block,":[104],"next":[111],"one.Each":[112],"of":[113,123],"these":[114],"unit":[119],"for":[120],"purpose":[122],"masking,":[124],"such":[125,168],"that":[126],"first":[128],"each":[131],"access":[134],"subsequent":[136],"non-causal":[140],"manner.Then,":[141],"answer":[144],"generated":[146],"manner.The":[151],"Segment-by-Segment":[152],"scheme":[153],"entails":[154],"no":[155],"additional":[156],"computational":[157],"overhead.When":[158],"integrated":[159],"using":[160],"lightweight":[162],"fine-tuning":[163],"into":[164],"already":[165],"trained":[166],"models":[167],"Llama":[170],"Qwen,":[172],"MAS":[173],"quickly":[174],"increases":[175],"models'":[176],"performances.Our":[177],"code":[178],"will":[179],"be":[180],"available":[181],"at:":[182],"https://github.com/shacharKZ/":[183],"MAS-Segment":[184]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
