{"id":"https://openalex.org/W4386729798","doi":"https://doi.org/10.1145/3604915.3610644","title":"Turning Dross Into Gold Loss: is BERT4Rec really better than SASRec?","display_name":"Turning Dross Into Gold Loss: is BERT4Rec really better than SASRec?","publication_year":2023,"publication_date":"2023-09-14","ids":{"openalex":"https://openalex.org/W4386729798","doi":"https://doi.org/10.1145/3604915.3610644"},"language":"en","primary_location":{"id":"doi:10.1145/3604915.3610644","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3604915.3610644","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM Conference on Recommender Systems","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2309.07602","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5092867520","display_name":"Anton Klenitskiy","orcid":"https://orcid.org/0009-0005-8961-6921"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Anton Klenitskiy","raw_affiliation_strings":["Sber, AI Lab, Russian Federation"],"raw_orcid":"https://orcid.org/0009-0005-8961-6921","affiliations":[{"raw_affiliation_string":"Sber, AI Lab, Russian Federation","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5012286441","display_name":"Alexey Vasilev","orcid":"https://orcid.org/0009-0007-1415-2004"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alexey Vasilev","raw_affiliation_strings":["Sber, AI Lab, Russian Federation"],"raw_orcid":"https://orcid.org/0009-0007-1415-2004","affiliations":[{"raw_affiliation_string":"Sber, AI Lab, Russian Federation","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5092867520"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":21.0757,"has_fulltext":true,"cited_by_count":47,"citation_normalized_percentile":{"value":0.9940992,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1120","last_page":"1125"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/softmax-function","display_name":"Softmax function","score":0.8024133443832397},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7641885280609131},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5418491959571838},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.5270607471466064},{"id":"https://openalex.org/keywords/binary-classification","display_name":"Binary classification","score":0.51859450340271},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5108277797698975},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.50677090883255},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.45242252945899963},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.4209924340248108},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.32376188039779663},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.21364560723304749},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.13468563556671143},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10746657848358154},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08492967486381531},{"id":"https://openalex.org/keywords/arithmetic","display_name":"Arithmetic","score":0.0748700201511383}],"concepts":[{"id":"https://openalex.org/C188441871","wikidata":"https://www.wikidata.org/wiki/Q7554146","display_name":"Softmax function","level":3,"score":0.8024133443832397},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7641885280609131},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5418491959571838},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5270607471466064},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.51859450340271},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5108277797698975},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.50677090883255},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.45242252945899963},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.4209924340248108},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32376188039779663},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.21364560723304749},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.13468563556671143},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10746657848358154},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08492967486381531},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.0748700201511383},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3604915.3610644","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3604915.3610644","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th ACM Conference on Recommender Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2309.07602","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.07602","pdf_url":"https://arxiv.org/pdf/2309.07602","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2309.07602","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2309.07602","pdf_url":"https://arxiv.org/pdf/2309.07602","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth","score":0.46000000834465027}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4386729798.pdf"},"referenced_works_count":31,"referenced_works":["https://openalex.org/W2027731328","https://openalex.org/W2171279286","https://openalex.org/W2626454364","https://openalex.org/W2734755249","https://openalex.org/W2741544350","https://openalex.org/W2783272285","https://openalex.org/W2937556626","https://openalex.org/W2962897394","https://openalex.org/W2963367478","https://openalex.org/W2964296635","https://openalex.org/W2980282514","https://openalex.org/W2984100107","https://openalex.org/W2996931760","https://openalex.org/W3081170586","https://openalex.org/W3089147479","https://openalex.org/W3102619277","https://openalex.org/W3133849783","https://openalex.org/W3135396887","https://openalex.org/W3156844209","https://openalex.org/W3185773347","https://openalex.org/W3194671304","https://openalex.org/W3195554589","https://openalex.org/W3201149665","https://openalex.org/W3206127589","https://openalex.org/W4296591831","https://openalex.org/W4296591843","https://openalex.org/W4296604567","https://openalex.org/W4298197157","https://openalex.org/W4299286960","https://openalex.org/W4306317705","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W3107204728","https://openalex.org/W4287591324","https://openalex.org/W3108503355","https://openalex.org/W2980176872","https://openalex.org/W4226420367","https://openalex.org/W2962876041","https://openalex.org/W2988321605","https://openalex.org/W2830281438","https://openalex.org/W3041490575","https://openalex.org/W2970690932"],"abstract_inverted_index":{"Recently":[0],"sequential":[1],"recommendations":[2],"and":[3,25,43,73,81,116,132],"next-item":[4],"prediction":[5],"task":[6],"has":[7],"become":[8],"increasingly":[9],"popular":[10],"in":[11,112],"the":[12,28,51,97,137],"field":[13],"of":[14,50,114,139],"recommender":[15],"systems.":[16],"Currently,":[17],"two":[18,41],"state-of-the-art":[19,46],"baselines":[20],"are":[21,94],"Transformer-based":[22],"models":[23,93],"SASRec":[24,69,106,124],"BERT4Rec.":[26],"Over":[27],"past":[29],"few":[30,37],"years,":[31],"there":[32],"have":[33],"been":[34],"quite":[35],"a":[36],"publications":[38],"comparing":[39],"these":[40],"algorithms":[42],"proposing":[44],"new":[45],"models.":[47],"In":[48,85,119],"most":[49],"publications,":[52],"BERT4Rec":[53,60,110],"achieves":[54],"better":[55],"performance":[56],"than":[57,146],"SASRec.":[58],"But":[59],"uses":[61,70],"cross-entropy":[62,76],"over":[63],"softmax":[64],"for":[65,78],"all":[66],"items,":[67],"while":[68],"negative":[71,83,130,140],"sampling":[72,131],"calculates":[74],"binary":[75],"loss":[77],"one":[79,82],"positive":[80],"item.":[84],"our":[86],"work,":[87],"we":[88,121],"show":[89,122],"that":[90,123],"if":[91],"both":[92,111],"trained":[95,128],"with":[96,129],"same":[98],"loss,":[99],"which":[100],"is":[101],"used":[102],"by":[103],"BERT4Rec,":[104,135],"then":[105],"will":[107],"significantly":[108],"outperform":[109,134],"terms":[113],"quality":[115],"training":[117],"speed.":[118],"addition,":[120],"could":[125],"be":[126,143],"effectively":[127],"still":[133],"but":[136],"number":[138],"examples":[141],"should":[142],"much":[144],"larger":[145],"one.":[147]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":25},{"year":2024,"cited_by_count":18}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
