{"id":"https://openalex.org/W4392254154","doi":"https://doi.org/10.1145/3639479.3639514","title":"GPT Rotational Position Embedding for Length Extrapolation","display_name":"GPT Rotational Position Embedding for Length Extrapolation","publication_year":2023,"publication_date":"2023-12-27","ids":{"openalex":"https://openalex.org/W4392254154","doi":"https://doi.org/10.1145/3639479.3639514"},"language":"en","primary_location":{"id":"doi:10.1145/3639479.3639514","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3639479.3639514","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 6th International Conference on Machine Learning and Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037199961","display_name":"Zhijie Qu","orcid":"https://orcid.org/0009-0003-8662-6468"},"institutions":[{"id":"https://openalex.org/I37796252","display_name":"Beijing University of Technology","ror":"https://ror.org/037b1pp87","country_code":"CN","type":"education","lineage":["https://openalex.org/I37796252"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zhijie Qu","raw_affiliation_strings":["Beijing University of Technology, China"],"affiliations":[{"raw_affiliation_string":"Beijing University of Technology, China","institution_ids":["https://openalex.org/I37796252"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5037199961"],"corresponding_institution_ids":["https://openalex.org/I37796252"],"apc_list":null,"apc_paid":null,"fwci":0.3497,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.6823011,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"166","last_page":"170"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9842000007629395,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.965399980545044,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/extrapolation","display_name":"Extrapolation","score":0.8251861929893494},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6349202990531921},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4934910535812378},{"id":"https://openalex.org/keywords/position","display_name":"Position (finance)","score":0.47228461503982544},{"id":"https://openalex.org/keywords/scaling","display_name":"Scaling","score":0.45756250619888306},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.41326045989990234},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.41032516956329346},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40934693813323975},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.2558767795562744},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.1685555875301361}],"concepts":[{"id":"https://openalex.org/C132459708","wikidata":"https://www.wikidata.org/wiki/Q744069","display_name":"Extrapolation","level":2,"score":0.8251861929893494},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6349202990531921},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4934910535812378},{"id":"https://openalex.org/C198082294","wikidata":"https://www.wikidata.org/wiki/Q3399648","display_name":"Position (finance)","level":2,"score":0.47228461503982544},{"id":"https://openalex.org/C99844830","wikidata":"https://www.wikidata.org/wiki/Q102441924","display_name":"Scaling","level":2,"score":0.45756250619888306},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.41326045989990234},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.41032516956329346},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40934693813323975},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.2558767795562744},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.1685555875301361},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3639479.3639514","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3639479.3639514","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2023 6th International Conference on Machine Learning and Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W4385570090","https://openalex.org/W4386185625","https://openalex.org/W4388979610"],"related_works":["https://openalex.org/W4362597605","https://openalex.org/W1574414179","https://openalex.org/W4297676672","https://openalex.org/W3009056573","https://openalex.org/W2922073769","https://openalex.org/W4281702477","https://openalex.org/W2490526372","https://openalex.org/W4376166922","https://openalex.org/W4378510483","https://openalex.org/W4221142204"],"abstract_inverted_index":{"Since":[0],"the":[1,4,8,21,25,34,46,68,73,87,102,108,115,120,124,130,139,161,174,187,192,198,201,207,212],"introduction":[2],"of":[3,48,95,163,176,200],"GPT":[5,140],"model":[6,10,22,116,141],"as":[7],"mainstream":[9],"for":[11,61,106],"dialog":[12,31,178],"generation,":[13,179],"a":[14,93,182],"hot":[15],"issue":[16],"is":[17,76,181,195],"how":[18],"to":[19,23,58,85,101,117,128,145,159,173,211],"enable":[20],"extend":[24],"prediction":[26,74],"length":[27,37,75,127,194,199],"and":[28,186],"generate":[29],"longer":[30,149],"texts":[32],"while":[33],"training":[35,202],"context":[36],"remains":[38],"constant.":[39],"Rotary":[40],"incorporates":[41],"absolute":[42,214],"position":[43,50,215],"information":[44],"in":[45,67,157,166],"form":[47],"relative":[49],"encoding,":[51],"an":[52],"approach":[53],"that":[54,138,147,191],"has":[55],"been":[56],"shown":[57],"be":[59],"advantageous":[60],"extrapolating":[62],"on":[63,123],"transformer":[64],"models.":[65],"However,":[66],"actual":[69],"extrapolation":[70,193,208],"process,":[71],"when":[72],"too":[77],"long,":[78],"its":[79,151],"perplexity":[80],"climbs":[81],"sharply.":[82],"In":[83,99,155],"order":[84,158],"solve":[86],"above":[88],"problems,":[89],"this":[90,112,134,164,169],"paper":[91,170],"proposes":[92],"method":[94,113,165],"scaling":[96],"frequency":[97],"basis.":[98],"contrast":[100],"previous":[103],"manual":[104],"test":[105],"selecting":[107],"optimal":[109,131],"parameter":[110],"\u0251,":[111],"allows":[114],"dynamically":[118],"train":[119],"parameters":[121],"based":[122],"input":[125],"sequence":[126],"obtain":[129],"\u0251.":[132],"Using":[133],"method,":[135],"we":[136],"demonstrate":[137,190],"can":[142],"effectively":[143],"extrapolate":[144],"contexts":[146],"are":[148],"than":[150],"original":[152,213],"pre-training":[153],"length.":[154],"addition,":[156],"verify":[160],"effectiveness":[162],"practical":[167],"applications,":[168],"applies":[171],"it":[172],"field":[175],"medical":[177],"which":[180,204],"more":[183],"complex":[184],"scenario,":[185],"experimental":[188],"results":[189],"several":[196],"times":[197],"context,":[203],"greatly":[205],"enhances":[206],"performance":[209],"compared":[210],"coding.":[216]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
