{"id":"https://openalex.org/W4295768363","doi":"https://doi.org/10.1109/fuzz-ieee55066.2022.9882843","title":"Multilingual Transformers for Product Matching \u2013 Experiments and a New Benchmark in Polish","display_name":"Multilingual Transformers for Product Matching \u2013 Experiments and a New Benchmark in Polish","publication_year":2022,"publication_date":"2022-07-18","ids":{"openalex":"https://openalex.org/W4295768363","doi":"https://doi.org/10.1109/fuzz-ieee55066.2022.9882843"},"language":"en","primary_location":{"id":"doi:10.1109/fuzz-ieee55066.2022.9882843","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fuzz-ieee55066.2022.9882843","pdf_url":null,"source":{"id":"https://openalex.org/S4363608205","display_name":"2022 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5074522103","display_name":"Michal Mozdzonek","orcid":null},"institutions":[{"id":"https://openalex.org/I108403487","display_name":"Warsaw University of Technology","ror":"https://ror.org/00y0xnp53","country_code":"PL","type":"education","lineage":["https://openalex.org/I108403487"]}],"countries":["PL"],"is_corresponding":true,"raw_author_name":"Michal Mozdzonek","raw_affiliation_strings":["Warsaw University of Technology,Faculty of Mathematics and Information Science,Warsaw,Poland","Faculty of Mathematics and Information Science, Warsaw University of Technology, Warsaw, Poland"],"affiliations":[{"raw_affiliation_string":"Warsaw University of Technology,Faculty of Mathematics and Information Science,Warsaw,Poland","institution_ids":["https://openalex.org/I108403487"]},{"raw_affiliation_string":"Faculty of Mathematics and Information Science, Warsaw University of Technology, Warsaw, Poland","institution_ids":["https://openalex.org/I108403487"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031984813","display_name":"Anna Wr\u00f3blewska","orcid":"https://orcid.org/0000-0002-3407-7570"},"institutions":[{"id":"https://openalex.org/I108403487","display_name":"Warsaw University of Technology","ror":"https://ror.org/00y0xnp53","country_code":"PL","type":"education","lineage":["https://openalex.org/I108403487"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Anna Wroblewska","raw_affiliation_strings":["Warsaw University of Technology,Faculty of Mathematics and Information Science,Warsaw,Poland","Faculty of Mathematics and Information Science, Warsaw University of Technology, Warsaw, Poland"],"affiliations":[{"raw_affiliation_string":"Warsaw University of Technology,Faculty of Mathematics and Information Science,Warsaw,Poland","institution_ids":["https://openalex.org/I108403487"]},{"raw_affiliation_string":"Faculty of Mathematics and Information Science, Warsaw University of Technology, Warsaw, Poland","institution_ids":["https://openalex.org/I108403487"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000259467","display_name":"Sergiy Tkachuk","orcid":"https://orcid.org/0000-0002-3434-6320"},"institutions":[{"id":"https://openalex.org/I66083562","display_name":"Systems Research Institute","ror":"https://ror.org/0111cp837","country_code":"PL","type":"facility","lineage":["https://openalex.org/I66083562","https://openalex.org/I99542240"]},{"id":"https://openalex.org/I99542240","display_name":"Polish Academy of Sciences","ror":"https://ror.org/01dr6c206","country_code":"PL","type":"funder","lineage":["https://openalex.org/I99542240"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Sergiy Tkachuk","raw_affiliation_strings":["Polish Academy of Sciences,Systems Research Institute,Warsaw,Poland,01-447"],"affiliations":[{"raw_affiliation_string":"Polish Academy of Sciences,Systems Research Institute,Warsaw,Poland,01-447","institution_ids":["https://openalex.org/I66083562","https://openalex.org/I99542240"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082087108","display_name":"Szymon \u0141ukasik","orcid":"https://orcid.org/0000-0001-6716-610X"},"institutions":[{"id":"https://openalex.org/I99542240","display_name":"Polish Academy of Sciences","ror":"https://ror.org/01dr6c206","country_code":"PL","type":"funder","lineage":["https://openalex.org/I99542240"]},{"id":"https://openalex.org/I66083562","display_name":"Systems Research Institute","ror":"https://ror.org/0111cp837","country_code":"PL","type":"facility","lineage":["https://openalex.org/I66083562","https://openalex.org/I99542240"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Szymon Lukasik","raw_affiliation_strings":["Polish Academy of Sciences,Systems Research Institute,Warsaw,Poland,01-447"],"affiliations":[{"raw_affiliation_string":"Polish Academy of Sciences,Systems Research Institute,Warsaw,Poland,01-447","institution_ids":["https://openalex.org/I66083562","https://openalex.org/I99542240"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5074522103"],"corresponding_institution_ids":["https://openalex.org/I108403487"],"apc_list":null,"apc_paid":null,"fwci":0.5162,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.56243386,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9718999862670898,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7379552125930786},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.7192195653915405},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6838371753692627},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6224344372749329},{"id":"https://openalex.org/keywords/homogeneous","display_name":"Homogeneous","score":0.5913294553756714},{"id":"https://openalex.org/keywords/product","display_name":"Product (mathematics)","score":0.4698922634124756},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.45816662907600403},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4195171892642975},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36728712916374207},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.35871386528015137},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3499992787837982},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11104682087898254},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07679244875907898},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.0741066038608551}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7379552125930786},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.7192195653915405},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6838371753692627},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6224344372749329},{"id":"https://openalex.org/C66882249","wikidata":"https://www.wikidata.org/wiki/Q169336","display_name":"Homogeneous","level":2,"score":0.5913294553756714},{"id":"https://openalex.org/C90673727","wikidata":"https://www.wikidata.org/wiki/Q901718","display_name":"Product (mathematics)","level":2,"score":0.4698922634124756},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.45816662907600403},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4195171892642975},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36728712916374207},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.35871386528015137},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3499992787837982},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11104682087898254},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07679244875907898},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0741066038608551},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/fuzz-ieee55066.2022.9882843","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fuzz-ieee55066.2022.9882843","pdf_url":null,"source":{"id":"https://openalex.org/S4363608205","display_name":"2022 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Fuzzy Systems (FUZZ-IEEE)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6000000238418579}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1990578345","https://openalex.org/W2035615211","https://openalex.org/W2171472464","https://openalex.org/W2546672044","https://openalex.org/W2763421725","https://openalex.org/W2798649495","https://openalex.org/W2896457183","https://openalex.org/W2908510526","https://openalex.org/W2945883855","https://openalex.org/W2965373594","https://openalex.org/W2970597249","https://openalex.org/W2978017171","https://openalex.org/W2979826702","https://openalex.org/W3014705052","https://openalex.org/W3035390927","https://openalex.org/W3080592272","https://openalex.org/W3123375411","https://openalex.org/W3201924938","https://openalex.org/W4221163653","https://openalex.org/W4230502578","https://openalex.org/W4242744113","https://openalex.org/W4286908394","https://openalex.org/W6745245109","https://openalex.org/W6755207826","https://openalex.org/W6757817989","https://openalex.org/W6763701032","https://openalex.org/W6766673545","https://openalex.org/W6768851824","https://openalex.org/W6801961408"],"related_works":["https://openalex.org/W1485630101","https://openalex.org/W2498017833","https://openalex.org/W112744582","https://openalex.org/W2357241418","https://openalex.org/W2081647779","https://openalex.org/W3033750096","https://openalex.org/W2789919619","https://openalex.org/W2086064646","https://openalex.org/W2081245617","https://openalex.org/W4234487584"],"abstract_inverted_index":{"Product":[0],"matching":[1,7,52,149],"corresponds":[2],"to":[3,95],"the":[4,50,96,107,138,143,156,159,166,171,178],"task":[5],"of":[6,27,158],"identical":[8],"products":[9],"across":[10],"different":[11],"data":[12,29],"sources.":[13],"It":[14,141],"typically":[15],"employs":[16],"available":[17],"product":[18,51,84,148],"features":[19,56],"which,":[20],"apart":[21],"from":[22,133],"being":[23],"multimodal,":[24],"i.e.,":[25],"comprised":[26],"various":[28],"types,":[30],"might":[31],"be":[32],"non-homogeneous":[33],"and":[34,60,67,79,103,125,174],"incomplete.":[35],"The":[36,86],"paper":[37],"shows":[38],"that":[39,90,120],"pre-trained,":[40],"multilingual":[41,65],"Transformer":[42],"models,":[43],"after":[44],"fine-tuning,":[45],"are":[46],"suitable":[47],"for":[48,82,137,147],"solving":[49],"problem":[53],"using":[54],"textual":[55],"both":[57],"in":[58,70,104,123,129,151],"English":[59,71],"Polish":[61,124,179],"languages.":[62],"We":[63],"tested":[64,99],"mBERT":[66,173],"XLM-RoBERTa":[68,175],"models":[69,92,176],"on":[72,100,127,177],"Web":[73],"Data":[74],"Commons":[75],"-":[76],"training":[77],"dataset":[78,116,146],"gold":[80],"standard":[81],"large-scale":[83],"matching.":[85],"obtained":[87,132,169],"results":[88,108,168],"show":[89],"these":[91],"perform":[93],"similarly":[94],"latest":[97],"solutions":[98],"this":[101],"set,":[102],"some":[105],"cases,":[106],"were":[109],"even":[110],"better.Additionally,":[111],"we":[112,163],"prepared":[113],"a":[114],"new":[115],"\u2013":[117,119],"ProductMatch.pl":[118],"is":[121,142],"entirely":[122],"based":[126],"offers":[128],"selected":[130],"categories":[131],"several":[134],"online":[135],"stores":[136],"research":[139],"purpose.":[140],"first":[144],"open":[145],"tasks":[150],"Polish,":[152],"which":[153],"allows":[154],"comparing":[155],"effectiveness":[157],"pre-trained":[160],"models.":[161],"Thus,":[162],"also":[164],"showed":[165],"baseline":[167],"by":[170],"fine-tuned":[172],"datasets.":[180]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2022-09-15T00:00:00"}
