{"id":"https://openalex.org/W4298137819","doi":"https://doi.org/10.15439/2022f185","title":"Applying SoftTriple Loss for Supervised Language Model Fine Tuning","display_name":"Applying SoftTriple Loss for Supervised Language Model Fine Tuning","publication_year":2022,"publication_date":"2022-09-26","ids":{"openalex":"https://openalex.org/W4298137819","doi":"https://doi.org/10.15439/2022f185"},"language":"en","primary_location":{"id":"doi:10.15439/2022f185","is_oa":true,"landing_page_url":"https://doi.org/10.15439/2022f185","pdf_url":"https://annals-csis.org/proceedings/2022/drp/pdf/185.pdf","source":{"id":"https://openalex.org/S4220651875","display_name":"Annals of Computer Science and Information Systems","issn_l":"2300-5963","issn":["2300-5963"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4310317484","host_organization_name":"Polskie Towarzystwo Informatyczne","host_organization_lineage":["https://openalex.org/P4310317484"],"host_organization_lineage_names":["Polskie Towarzystwo Informatyczne"],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of Computer Science and Information Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://annals-csis.org/proceedings/2022/drp/pdf/185.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5090197536","display_name":"Witold Sosnowski","orcid":"https://orcid.org/0000-0002-9219-584X"},"institutions":[{"id":"https://openalex.org/I108403487","display_name":"Warsaw University of Technology","ror":"https://ror.org/00y0xnp53","country_code":"PL","type":"education","lineage":["https://openalex.org/I108403487"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Witold Sosnowski","raw_affiliation_strings":["Faculty of Mathematics and Information Science Warsaw University of Technology Warsaw, Poland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Mathematics and Information Science Warsaw University of Technology Warsaw, Poland","institution_ids":["https://openalex.org/I108403487"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031984813","display_name":"Anna Wr\u00f3blewska","orcid":"https://orcid.org/0000-0002-3407-7570"},"institutions":[{"id":"https://openalex.org/I108403487","display_name":"Warsaw University of Technology","ror":"https://ror.org/00y0xnp53","country_code":"PL","type":"education","lineage":["https://openalex.org/I108403487"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Anna Wr\u00f3blewska","raw_affiliation_strings":["Faculty of Mathematics and Information Science Warsaw University of Technology Warsaw, Poland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Mathematics and Information Science Warsaw University of Technology Warsaw, Poland","institution_ids":["https://openalex.org/I108403487"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5023112001","display_name":"Piotr Gawrysiak","orcid":"https://orcid.org/0000-0002-9647-6761"},"institutions":[{"id":"https://openalex.org/I108403487","display_name":"Warsaw University of Technology","ror":"https://ror.org/00y0xnp53","country_code":"PL","type":"education","lineage":["https://openalex.org/I108403487"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Piotr Gawrysiak","raw_affiliation_strings":["Faculty of Electronics and Information Technology Warsaw University of Technology Warsaw, Poland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Electronics and Information Technology Warsaw University of Technology Warsaw, Poland","institution_ids":["https://openalex.org/I108403487"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.1038,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.3000265,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"30","issue":null,"first_page":"141","last_page":"147"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cross-entropy","display_name":"Cross entropy","score":0.7720797061920166},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6973761320114136},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.5655815601348877},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5603926777839661},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5096461772918701},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4992988109588623},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4542345404624939},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4509885907173157},{"id":"https://openalex.org/keywords/information-loss","display_name":"Information loss","score":0.44057127833366394},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.42165112495422363},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.4112602770328522},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3460020422935486}],"concepts":[{"id":"https://openalex.org/C167981619","wikidata":"https://www.wikidata.org/wiki/Q1685498","display_name":"Cross entropy","level":3,"score":0.7720797061920166},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6973761320114136},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.5655815601348877},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5603926777839661},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5096461772918701},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4992988109588623},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4542345404624939},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4509885907173157},{"id":"https://openalex.org/C2988416141","wikidata":"https://www.wikidata.org/wiki/Q6031139","display_name":"Information loss","level":2,"score":0.44057127833366394},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.42165112495422363},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.4112602770328522},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3460020422935486},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.15439/2022f185","is_oa":true,"landing_page_url":"https://doi.org/10.15439/2022f185","pdf_url":"https://annals-csis.org/proceedings/2022/drp/pdf/185.pdf","source":{"id":"https://openalex.org/S4220651875","display_name":"Annals of Computer Science and Information Systems","issn_l":"2300-5963","issn":["2300-5963"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4310317484","host_organization_name":"Polskie Towarzystwo Informatyczne","host_organization_lineage":["https://openalex.org/P4310317484"],"host_organization_lineage_names":["Polskie Towarzystwo Informatyczne"],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of Computer Science and Information Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:doaj.org/article:be0a069304d94069a30cab095f78a511","is_oa":false,"landing_page_url":"https://doaj.org/article/be0a069304d94069a30cab095f78a511","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Annals of computer science and information systems, Vol 30, Pp 141-147 (2022)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.15439/2022f185","is_oa":true,"landing_page_url":"https://doi.org/10.15439/2022f185","pdf_url":"https://annals-csis.org/proceedings/2022/drp/pdf/185.pdf","source":{"id":"https://openalex.org/S4220651875","display_name":"Annals of Computer Science and Information Systems","issn_l":"2300-5963","issn":["2300-5963"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":"https://openalex.org/P4310317484","host_organization_name":"Polskie Towarzystwo Informatyczne","host_organization_lineage":["https://openalex.org/P4310317484"],"host_organization_lineage_names":["Polskie Towarzystwo Informatyczne"],"type":"conference"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Annals of Computer Science and Information Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.4399999976158142,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322637","display_name":"Politechnika Warszawska","ror":"https://ror.org/00y0xnp53"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4298137819.pdf","grobid_xml":"https://content.openalex.org/works/W4298137819.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1614298861","https://openalex.org/W2014902591","https://openalex.org/W2106053110","https://openalex.org/W2113459411","https://openalex.org/W2117154949","https://openalex.org/W2138621090","https://openalex.org/W2160660844","https://openalex.org/W2163455955","https://openalex.org/W2187131616","https://openalex.org/W2251939518","https://openalex.org/W2520774990","https://openalex.org/W2525778437","https://openalex.org/W2598634450","https://openalex.org/W2605102252","https://openalex.org/W2606092111","https://openalex.org/W2790235966","https://openalex.org/W2896457183","https://openalex.org/W2949380545","https://openalex.org/W2952186591","https://openalex.org/W2964121744","https://openalex.org/W2965373594","https://openalex.org/W2991234496","https://openalex.org/W3005680577","https://openalex.org/W3020900419","https://openalex.org/W3049002835","https://openalex.org/W3097571385","https://openalex.org/W3099206234","https://openalex.org/W3197822411","https://openalex.org/W4205352905","https://openalex.org/W4224920103","https://openalex.org/W4287614078","https://openalex.org/W4301386992","https://openalex.org/W6676984168","https://openalex.org/W6735531217"],"related_works":["https://openalex.org/W2047632477","https://openalex.org/W4287644835","https://openalex.org/W3092281475","https://openalex.org/W2951959408","https://openalex.org/W3098003361","https://openalex.org/W2895831313","https://openalex.org/W4289406078","https://openalex.org/W4385386361","https://openalex.org/W4386453465","https://openalex.org/W3194440789"],"abstract_inverted_index":{"We":[0],"introduce":[1],"a":[2,54],"new":[3],"loss":[4,26,38,51],"function":[5,27,52],"based":[6],"on":[7,46],"cross":[8],"entropy":[9],"and":[10,84],"SoftTriple":[11],"loss,":[12],"TripleEntropy,":[13],"to":[14],"improve":[15,29],"classification":[16],"performance":[17],"for":[18,67,76,80,85],"fine-tuning":[19],"general":[20],"knowledge":[21],"pre-trained":[22],"language":[23],"models.":[24],"This":[25],"can":[28],"the":[30,61,64],"robust":[31],"RoBERTa":[32],"baseline":[33],"model":[34],"fine-tuned":[35],"with":[36],"cross-entropy":[37],"by":[39],"about":[40,72],"0.02-2.29":[41],"percentage":[42,74,78,82,88],"points.":[43,89],"Thorough":[44],"tests":[45],"popular":[47],"datasets":[48],"using":[49],"our":[50],"indicate":[53],"steady":[55],"gain.":[56],"The":[57],"fewer":[58],"samples":[59],"in":[60],"training":[62],"dataset,":[63,69],"higher":[65],"gain-thus,":[66],"smallsized":[68],"it":[70],"is":[71],"0.71":[73],"points,":[75,79,83],"mediumsized-0.86":[77],"large-0.20":[81],"extra-large":[86],"0.04":[87]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
