{"id":"https://openalex.org/W7140291513","doi":"https://doi.org/10.48550/arxiv.2603.23172","title":"From Synthetic to Native: Benchmarking Multilingual Intent Classification in Logistics Customer Service","display_name":"From Synthetic to Native: Benchmarking Multilingual Intent Classification in Logistics Customer Service","publication_year":2026,"publication_date":"2026-03-24","ids":{"openalex":"https://openalex.org/W7140291513","doi":"https://doi.org/10.48550/arxiv.2603.23172"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.23172","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23172","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.23172","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130578538","display_name":"Haoyu He","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"He, Haoyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130548852","display_name":"Jinyu Zhuang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhuang, Jinyu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011216835","display_name":"Haoran Chu","orcid":"https://orcid.org/0000-0001-5907-4897"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chu, Haoran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130590144","display_name":"Shuhang Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Shuhang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130575556","display_name":"J","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"J","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Group, T AI","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Group, T AI","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130542977","display_name":"Hao Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Hao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130554455","display_name":"Kunpeng Han","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Han, Kunpeng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.24629999697208405,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.24629999697208405,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.06589999794960022,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.06379999965429306,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.7522000074386597},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.6331999897956848},{"id":"https://openalex.org/keywords/taxonomy","display_name":"Taxonomy (biology)","score":0.448199987411499},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.40779998898506165},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.39570000767707825},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.3939000070095062},{"id":"https://openalex.org/keywords/service-quality","display_name":"Service quality","score":0.3569999933242798},{"id":"https://openalex.org/keywords/service","display_name":"Service (business)","score":0.3458999991416931}],"concepts":[{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.7522000074386597},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.737500011920929},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6331999897956848},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5105999708175659},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.448199987411499},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43880000710487366},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.40779998898506165},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.39570000767707825},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.3939000070095062},{"id":"https://openalex.org/C140781008","wikidata":"https://www.wikidata.org/wiki/Q1221081","display_name":"Service quality","level":3,"score":0.3569999933242798},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.3458999991416931},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3418999910354614},{"id":"https://openalex.org/C2780035574","wikidata":"https://www.wikidata.org/wiki/Q30081","display_name":"Multilingualism","level":2,"score":0.33970001339912415},{"id":"https://openalex.org/C96455323","wikidata":"https://www.wikidata.org/wiki/Q13955","display_name":"Arabic","level":2,"score":0.31130000948905945},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3057999908924103},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.29789999127388},{"id":"https://openalex.org/C2984334869","wikidata":"https://www.wikidata.org/wiki/Q1060653","display_name":"Customer service","level":3,"score":0.29670000076293945},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.29660001397132874},{"id":"https://openalex.org/C191511416","wikidata":"https://www.wikidata.org/wiki/Q999278","display_name":"Customer satisfaction","level":2,"score":0.2904999852180481},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.28929999470710754},{"id":"https://openalex.org/C35219183","wikidata":"https://www.wikidata.org/wiki/Q5146","display_name":"Portuguese","level":2,"score":0.28780001401901245},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.2874999940395355},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27889999747276306},{"id":"https://openalex.org/C171041071","wikidata":"https://www.wikidata.org/wiki/Q36870","display_name":"First language","level":2,"score":0.27219998836517334},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2639999985694885},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2581000030040741},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.2563999891281128}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.23172","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23172","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.23172","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.23172","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5671706199645996,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multilingual":[0],"intent":[1,59,182],"classification":[2,60],"is":[3,35,90],"central":[4],"to":[5],"customer-service":[6,65],"systems":[7],"on":[8,31,164],"global":[9],"logistics":[10,64],"platforms,":[11],"where":[12],"models":[13,149],"must":[14],"process":[15],"noisy":[16,165],"user":[17,74],"queries":[18,75],"across":[19],"languages":[20,118],"and":[21,38,45,86,89,99,105,115,129,136,140,146,152,172],"hierarchical":[22,57,153],"label":[23],"spaces.":[24],"Yet":[25],"most":[26],"existing":[27],"multilingual":[28,58,142,181],"benchmarks":[29],"rely":[30],"machine-translated":[32,137],"text,":[33],"which":[34],"typically":[36],"cleaner":[37],"more":[39,179],"standardized":[40],"than":[41],"native":[42,135,166],"customer":[43],"requests":[44],"can":[46],"therefore":[47],"overestimate":[48,162],"real-world":[49],"robustness.":[50],"We":[51],"present":[52],"a":[53,93],"public":[54],"benchmark":[55,141],"for":[56,169,178],"constructed":[61],"from":[62,77],"real":[63,130],"logs.":[66],"The":[67],"dataset":[68],"contains":[69],"approximately":[70],"30K":[71],"de-identified,":[72],"stand-alone":[73],"curated":[76],"600K":[78],"historical":[79],"records":[80],"through":[81],"filtering,":[82],"LLM-assisted":[83],"quality":[84],"control,":[85],"human":[87],"verification,":[88],"organized":[91],"into":[92],"two-level":[94],"taxonomy":[95],"with":[96],"13":[97],"parent":[98],"17":[100],"leaf":[101],"intents.":[102],"English,":[103],"Spanish,":[104],"Arabic":[106],"are":[107],"included":[108],"as":[109],"seen":[110],"languages,":[111],"while":[112],"Indonesian,":[113],"Chinese,":[114],"additional":[116],"test-only":[117],"support":[119],"zero-shot":[120],"evaluation.":[121],"To":[122],"directly":[123],"measure":[124],"the":[125,176],"gap":[126],"between":[127],"synthetic":[128],"evaluation,":[131],"we":[132],"provide":[133],"paired":[134],"test":[138,159],"sets":[139,160],"encoders,":[143],"embedding":[144],"models,":[145],"small":[147],"language":[148],"under":[150],"flat":[151],"protocols.":[154],"Results":[155],"show":[156],"that":[157],"translated":[158],"substantially":[161],"performance":[163],"queries,":[167],"especially":[168],"long-tail":[170],"intents":[171],"cross-lingual":[173],"transfer,":[174],"underscoring":[175],"need":[177],"realistic":[180],"benchmarks.":[183]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-26T00:00:00"}
