{"id":"https://openalex.org/W4313451499","doi":"https://doi.org/10.1109/iccr56254.2022.9995764","title":"Feature Encoding For High Cardinality Categorical Variables Using Entity Embeddings: A Case Study in Customs Fraud Detection","display_name":"Feature Encoding For High Cardinality Categorical Variables Using Entity Embeddings: A Case Study in Customs Fraud Detection","publication_year":2022,"publication_date":"2022-10-06","ids":{"openalex":"https://openalex.org/W4313451499","doi":"https://doi.org/10.1109/iccr56254.2022.9995764"},"language":"en","primary_location":{"id":"doi:10.1109/iccr56254.2022.9995764","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccr56254.2022.9995764","pdf_url":null,"source":{"id":"https://openalex.org/S4363608155","display_name":"2022 International Conference on Cyber Resilience (ICCR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Conference on Cyber Resilience (ICCR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069638212","display_name":"Eric Khoo Jiun Hooi","orcid":null},"institutions":[{"id":"https://openalex.org/I4576418","display_name":"University of Technology Malaysia","ror":"https://ror.org/026w31v75","country_code":"MY","type":"education","lineage":["https://openalex.org/I4576418"]}],"countries":["MY"],"is_corresponding":true,"raw_author_name":"Eric Khoo Jiun Hooi","raw_affiliation_strings":["Universiti Teknologi Malaysia,Faculty of Computing,Johor Bahru,Johor,Malaysia,81310"],"affiliations":[{"raw_affiliation_string":"Universiti Teknologi Malaysia,Faculty of Computing,Johor Bahru,Johor,Malaysia,81310","institution_ids":["https://openalex.org/I4576418"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062591167","display_name":"Anazida Zainal","orcid":"https://orcid.org/0000-0003-0022-3039"},"institutions":[{"id":"https://openalex.org/I4576418","display_name":"University of Technology Malaysia","ror":"https://ror.org/026w31v75","country_code":"MY","type":"education","lineage":["https://openalex.org/I4576418"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Anazida Zainal","raw_affiliation_strings":["Universiti Teknologi Malaysia,Faculty of Computing,Johor Bahru,Johor,Malaysia,81310"],"affiliations":[{"raw_affiliation_string":"Universiti Teknologi Malaysia,Faculty of Computing,Johor Bahru,Johor,Malaysia,81310","institution_ids":["https://openalex.org/I4576418"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090096252","display_name":"Mohamad Nizam Kassim","orcid":null},"institutions":[{"id":"https://openalex.org/I4210140655","display_name":"Jabatan Perkhidmatan Awam Malaysia","ror":"https://ror.org/04kpqhb39","country_code":"MY","type":"government","lineage":["https://openalex.org/I4210140655"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Mohamad Nizam Kassim","raw_affiliation_strings":["Bahagian Penyelidikan Strategik, National Anti-Financial Crime Center,Putrajaya,Malaysia","Bahagian Penyelidikan Strategik, National Anti-Financial Crime Center, Putrajaya, Malaysia"],"affiliations":[{"raw_affiliation_string":"Bahagian Penyelidikan Strategik, National Anti-Financial Crime Center,Putrajaya,Malaysia","institution_ids":["https://openalex.org/I4210140655"]},{"raw_affiliation_string":"Bahagian Penyelidikan Strategik, National Anti-Financial Crime Center, Putrajaya, Malaysia","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032814384","display_name":"Zaily Ayub","orcid":null},"institutions":[{"id":"https://openalex.org/I4210133774","display_name":"Department of Statistics Malaysia","ror":"https://ror.org/03ztw9d82","country_code":"MY","type":"government","lineage":["https://openalex.org/I4210133774"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Zaily Ayub","raw_affiliation_strings":["Technical Services Division, Valuation Management Branch,Royal Malaysian Customs Department,Putrajaya,Malaysia","Royal Malaysian Customs Department, Technical Services Division, Valuation Management Branch, Putrajaya, Malaysia"],"affiliations":[{"raw_affiliation_string":"Technical Services Division, Valuation Management Branch,Royal Malaysian Customs Department,Putrajaya,Malaysia","institution_ids":["https://openalex.org/I4210133774"]},{"raw_affiliation_string":"Royal Malaysian Customs Department, Technical Services Division, Valuation Management Branch, Putrajaya, Malaysia","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5069638212"],"corresponding_institution_ids":["https://openalex.org/I4576418"],"apc_list":null,"apc_paid":null,"fwci":0.4158,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.59938591,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9747999906539917,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11512","display_name":"Anomaly Detection Techniques and Applications","score":0.9666000008583069,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/categorical-variable","display_name":"Categorical variable","score":0.8630826473236084},{"id":"https://openalex.org/keywords/cardinality","display_name":"Cardinality (data modeling)","score":0.8545758128166199},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.7732197046279907},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.737293004989624},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6874658465385437},{"id":"https://openalex.org/keywords/government","display_name":"Government (linguistics)","score":0.6037867069244385},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.589296817779541},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.537145733833313},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5068367123603821},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4875490069389343},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.47421884536743164},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4588194191455841},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3611530661582947},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.356983482837677},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3338986933231354}],"concepts":[{"id":"https://openalex.org/C5274069","wikidata":"https://www.wikidata.org/wiki/Q2285707","display_name":"Categorical variable","level":2,"score":0.8630826473236084},{"id":"https://openalex.org/C87117476","wikidata":"https://www.wikidata.org/wiki/Q362383","display_name":"Cardinality (data modeling)","level":2,"score":0.8545758128166199},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.7732197046279907},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.737293004989624},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6874658465385437},{"id":"https://openalex.org/C2778137410","wikidata":"https://www.wikidata.org/wiki/Q2732820","display_name":"Government (linguistics)","level":2,"score":0.6037867069244385},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.589296817779541},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.537145733833313},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5068367123603821},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4875490069389343},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.47421884536743164},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4588194191455841},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3611530661582947},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.356983482837677},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3338986933231354},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iccr56254.2022.9995764","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iccr56254.2022.9995764","pdf_url":null,"source":{"id":"https://openalex.org/S4363608155","display_name":"2022 International Conference on Cyber Resilience (ICCR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 International Conference on Cyber Resilience (ICCR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.6200000047683716,"display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G5807388589","display_name":null,"funder_award_id":"Q.J130000.4351.09G61","funder_id":"https://openalex.org/F4320323300","funder_display_name":"Universiti Teknologi Malaysia"}],"funders":[{"id":"https://openalex.org/F4320323300","display_name":"Universiti Teknologi Malaysia","ror":"https://ror.org/026w31v75"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W2096945460","https://openalex.org/W2104167780","https://openalex.org/W2135835855","https://openalex.org/W2157444450","https://openalex.org/W2464785945","https://openalex.org/W2893777921","https://openalex.org/W2906554279","https://openalex.org/W2940640769","https://openalex.org/W2945060583","https://openalex.org/W2989258287","https://openalex.org/W2998681424","https://openalex.org/W3010957669","https://openalex.org/W3014479773","https://openalex.org/W3080511606","https://openalex.org/W3105965470","https://openalex.org/W3122491347","https://openalex.org/W3169599570","https://openalex.org/W3176957414","https://openalex.org/W3189173701","https://openalex.org/W3211171742","https://openalex.org/W3214804357","https://openalex.org/W4205532559","https://openalex.org/W4396807510","https://openalex.org/W6664464457","https://openalex.org/W6683161245","https://openalex.org/W6719736774","https://openalex.org/W6764439227","https://openalex.org/W6797960062","https://openalex.org/W6866448437"],"related_works":["https://openalex.org/W2002177687","https://openalex.org/W4386799044","https://openalex.org/W2773208253","https://openalex.org/W2560646951","https://openalex.org/W4297454206","https://openalex.org/W3114793362","https://openalex.org/W4229333355","https://openalex.org/W4390437797","https://openalex.org/W4312820300","https://openalex.org/W2400708317"],"abstract_inverted_index":{"Customs":[0,72],"authorities":[1],"nowadays":[2],"are":[3,97],"pressurized":[4],"by":[5,68,116],"the":[6,21,32,37,59,69,77,80,85,90,121,132],"increasing":[7],"levels":[8],"of":[9,54,79,84,92,100,120,131],"international":[10],"trade":[11,35],"and":[12,141,149],"insufficient":[13],"resources":[14,30],"to":[15,26,154],"perform":[16],"physical":[17],"check":[18],"on":[19,31,58],"all":[20],"trades.":[22],"Government":[23],"therefore":[24],"aims":[25],"focus":[27],"their":[28],"limited":[29],"highly":[33],"suspicious":[34,45],"with":[36,64],"help":[38],"from":[39],"data":[40],"mining":[41],"especially":[42],"in":[43,103,112,124,143],"detecting":[44],"trade.":[46],"In":[47],"customs":[48],"fraud":[49],"detection":[50],"system,":[51],"a":[52,98,110],"total":[53],"38,173":[55],"records":[56],"focusing":[57],"Harmonized":[60],"System":[61],"code":[62],"starting":[63],"4011":[65],"that":[66],"shared":[67],"Royal":[70],"Malaysian":[71],"Department":[73],"was":[74,127],"analyzed.":[75],"From":[76],"analysis":[78],"datasets":[81],"provided,":[82],"one":[83],"main":[86],"difficulties":[87],"faced":[88],"is":[89],"issue":[91,115],"high":[93],"cardinality":[94],"where":[95],"there":[96],"lot":[99],"unique":[101,122],"values":[102,123],"single":[104],"feature.":[105],"Thus,":[106],"Entity":[107],"Embeddings":[108],"as":[109],"method":[111,136],"solving":[113],"this":[114],"providing":[117],"better":[118],"representation":[119],"lower":[125],"dimensions":[126],"introduced.":[128],"The":[129],"result":[130],"proposed":[133],"feature":[134,156],"encoding":[135,157],"can":[137],"achieve":[138],"higher":[139],"AUC-ROC":[140],"F1-Score":[142],"Logistic":[144],"Regression,":[145],"Support":[146],"Vector":[147],"Machine,":[148],"Neural":[150],"Network":[151],"models":[152],"compared":[153],"other":[155],"techniques.":[158]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
