{"id":"https://openalex.org/W3119042974","doi":"https://doi.org/10.3390/make3010006","title":"Learning DOM Trees of Web Pages by Subpath Kernel and Detecting Fake e-Commerce Sites","display_name":"Learning DOM Trees of Web Pages by Subpath Kernel and Detecting Fake e-Commerce Sites","publication_year":2021,"publication_date":"2021-01-14","ids":{"openalex":"https://openalex.org/W3119042974","doi":"https://doi.org/10.3390/make3010006","mag":"3119042974"},"language":"en","primary_location":{"id":"doi:10.3390/make3010006","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make3010006","pdf_url":"https://www.mdpi.com/2504-4990/3/1/6/pdf?version=1610947220","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2504-4990/3/1/6/pdf?version=1610947220","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039423670","display_name":"Kilho Shin","orcid":"https://orcid.org/0000-0002-0425-8485"},"institutions":[{"id":"https://openalex.org/I45391821","display_name":"Gakushuin University","ror":"https://ror.org/037s2db26","country_code":"JP","type":"education","lineage":["https://openalex.org/I45391821"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Kilho Shin","raw_affiliation_strings":["Computer Centre, Gakushuin University, Tokyo 1718588, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Computer Centre, Gakushuin University, Tokyo 1718588, Japan","institution_ids":["https://openalex.org/I45391821"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012874263","display_name":"Taichi Ishikawa","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Taichi Ishikawa","raw_affiliation_strings":["Information Networking Institute, Carnegie Mellon University, Pittsburgh, PA 15213, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Information Networking Institute, Carnegie Mellon University, Pittsburgh, PA 15213, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106730954","display_name":"Yulu Liu","orcid":"https://orcid.org/0000-0002-0496-8962"},"institutions":[{"id":"https://openalex.org/I1301041018","display_name":"Rakuten (Japan)","ror":"https://ror.org/0098kke80","country_code":"JP","type":"company","lineage":["https://openalex.org/I1301041018"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yu-Lu Liu","raw_affiliation_strings":["Cyber Security Defense Department, Rakuten, Inc., Tokyo 1580094, Japan"],"raw_orcid":"https://orcid.org/0000-0002-0496-8962","affiliations":[{"raw_affiliation_string":"Cyber Security Defense Department, Rakuten, Inc., Tokyo 1580094, Japan","institution_ids":["https://openalex.org/I1301041018"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029804415","display_name":"David Shepard","orcid":null},"institutions":[{"id":"https://openalex.org/I4210099074","display_name":"Evidation Health (United States)","ror":"https://ror.org/00vhpak23","country_code":"US","type":"company","lineage":["https://openalex.org/I4210099074"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Lawrence Shepard","raw_affiliation_strings":["Data Engineering, Evidation Health, Inc., San Mateo, CA 94402, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Data Engineering, Evidation Health, Inc., San Mateo, CA 94402, USA","institution_ids":["https://openalex.org/I4210099074"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5039423670"],"corresponding_institution_ids":["https://openalex.org/I45391821"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":2.8439,"has_fulltext":false,"cited_by_count":10,"citation_normalized_percentile":{"value":0.91609019,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":"3","issue":"1","first_page":"95","last_page":"122"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10400","display_name":"Network Security and Intrusion Detection","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6922001838684082},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6863745450973511},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5926002860069275},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.5556669235229492},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.4601619243621826},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.4344102144241333},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.36574599146842957}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6922001838684082},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6863745450973511},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5926002860069275},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.5556669235229492},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.4601619243621826},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.4344102144241333},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.36574599146842957}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/make3010006","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make3010006","pdf_url":"https://www.mdpi.com/2504-4990/3/1/6/pdf?version=1610947220","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:bba9be53c572435db94ed7dc2e31d060","is_oa":true,"landing_page_url":"https://doaj.org/article/bba9be53c572435db94ed7dc2e31d060","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning and Knowledge Extraction, Vol 3, Iss 1, Pp 95-122 (2021)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/2504-4990/3/1/6/","is_oa":true,"landing_page_url":"https://dx.doi.org/10.3390/make3010006","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Machine Learning and Knowledge Extraction; Volume 3; Issue 1; Pages: 95-122","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/make3010006","is_oa":true,"landing_page_url":"https://doi.org/10.3390/make3010006","pdf_url":"https://www.mdpi.com/2504-4990/3/1/6/pdf?version=1610947220","source":{"id":"https://openalex.org/S4210213891","display_name":"Machine Learning and Knowledge Extraction","issn_l":"2504-4990","issn":["2504-4990"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Machine Learning and Knowledge Extraction","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.5600000023841858,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G4418453036","display_name":null,"funder_award_id":"17H00762","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3119042974.pdf"},"referenced_works_count":62,"referenced_works":["https://openalex.org/W179694669","https://openalex.org/W875938751","https://openalex.org/W1136330450","https://openalex.org/W1482333726","https://openalex.org/W1486401986","https://openalex.org/W1492949973","https://openalex.org/W1495789144","https://openalex.org/W1512788924","https://openalex.org/W1565746575","https://openalex.org/W1576213419","https://openalex.org/W1578613744","https://openalex.org/W1605903931","https://openalex.org/W1647671624","https://openalex.org/W1730588416","https://openalex.org/W1774857219","https://openalex.org/W1866266163","https://openalex.org/W1975009259","https://openalex.org/W1976373002","https://openalex.org/W1978478796","https://openalex.org/W1988068423","https://openalex.org/W2003219686","https://openalex.org/W2003889154","https://openalex.org/W2021791566","https://openalex.org/W2036714085","https://openalex.org/W2040750298","https://openalex.org/W2046953300","https://openalex.org/W2056708848","https://openalex.org/W2057294899","https://openalex.org/W2108957124","https://openalex.org/W2121817315","https://openalex.org/W2127713198","https://openalex.org/W2139177218","https://openalex.org/W2139565456","https://openalex.org/W2147828873","https://openalex.org/W2153635508","https://openalex.org/W2163509192","https://openalex.org/W2164422986","https://openalex.org/W2171643895","https://openalex.org/W2174789775","https://openalex.org/W2190286163","https://openalex.org/W2295731716","https://openalex.org/W2531819622","https://openalex.org/W2789523537","https://openalex.org/W2801883820","https://openalex.org/W2803142167","https://openalex.org/W2810769850","https://openalex.org/W2889101931","https://openalex.org/W2889851289","https://openalex.org/W2894350870","https://openalex.org/W2899428374","https://openalex.org/W2909737018","https://openalex.org/W2963792772","https://openalex.org/W3120421331","https://openalex.org/W3212909510","https://openalex.org/W4212877087","https://openalex.org/W6623915047","https://openalex.org/W6628852952","https://openalex.org/W6629057494","https://openalex.org/W6636334323","https://openalex.org/W6636915900","https://openalex.org/W6639242023","https://openalex.org/W6751406538"],"related_works":["https://openalex.org/W2090763504","https://openalex.org/W148178222","https://openalex.org/W4224922629","https://openalex.org/W4384470695","https://openalex.org/W3134840015","https://openalex.org/W4366979180","https://openalex.org/W2377198601","https://openalex.org/W2381980924","https://openalex.org/W2353774927","https://openalex.org/W2081026125"],"abstract_inverted_index":{"The":[0],"subpath":[1,169],"kernel":[2,35,170],"is":[3,40,105,118,144,231],"a":[4,31,125,145],"class":[5],"of":[6,21,33,97,198],"positive":[7],"definite":[8],"kernels":[9,82],"defined":[10],"over":[11],"trees,":[12],"which":[13],"has":[14,64,207],"the":[15,19,71,95,103,111,196,239],"following":[16],"advantages":[17],"for":[18,120,139,223],"purposes":[20],"classification,":[22],"regression":[23],"and":[24,150,220],"clustering:":[25],"it":[26],"can":[27,50],"be":[28,51,166],"incorporated":[29],"into":[30],"variety":[32],"powerful":[34],"machines":[36],"including":[37],"SVM;":[38],"It":[39,49],"invariant":[41],"whether":[42],"input":[43],"trees":[44],"are":[45,203],"ordered":[46],"or":[47],"unordered;":[48],"computed":[52],"by":[53],"significantly":[54],"fast":[55],"linear-time":[56],"algorithms;":[57],"And,":[58],"finally,":[59],"its":[60],"excellent":[61],"learning":[62,171],"performance":[63,174],"been":[65,154],"proven":[66],"through":[67,188],"intensive":[68],"experiments":[69],"in":[70,80],"literature.":[72],"In":[73],"this":[74],"paper,":[75],"we":[76,90,177],"leverage":[77],"recent":[78],"advances":[79],"tree":[81],"to":[83,94,107,156,165,183],"solve":[84],"real":[85,146],"problems.":[86,131],"As":[87],"an":[88],"example,":[89],"apply":[91],"our":[92,205],"method":[93,206],"problem":[96,104,147],"detecting":[98],"fake":[99,121,135],"e-commerce":[100,122,136,142],"sites.":[101],"Although":[102],"similar":[106],"phishing":[108],"site":[109,137],"detection,":[110],"fact":[112],"that":[113,148,192],"mimicking":[114],"existing":[115,160,185],"authentic":[116],"sites":[117,123],"harmful":[119],"marks":[124],"clear":[126],"difference":[127],"between":[128],"these":[129,173],"two":[130],"We":[132],"focus":[133],"on":[134],"detection":[138,186],"three":[140],"reasons:":[141],"fraud":[143],"companies":[149],"law":[151],"enforcement":[152],"have":[153],"cooperating":[155],"solve;":[157],"Inefficiency":[158],"hampers":[159],"approaches":[161],"because":[162],"datasets":[163],"tend":[164],"large,":[167],"while":[168],"overcomes":[172],"challenges;":[175],"And":[176],"offer":[178],"increased":[179],"resiliency":[180],"against":[181],"attempts":[182],"subvert":[184],"methods":[187],"incorporating":[189],"robust":[190],"features":[191],"adversaries":[193],"cannot":[194],"change:":[195],"DOM-trees":[197],"web-sites.":[199],"Our":[200],"real-world":[201],"results":[202],"remarkable:":[204],"exhibited":[208],"accuracy":[209,222,240],"as":[210,212],"high":[211],"0.998":[213],"when":[214],"training":[215,237],"SVM":[216],"with":[217,234],"1000":[218],"instances":[219],"evaluating":[221],"almost":[224],"7000":[225],"independent":[226],"instances.":[227],"Its":[228],"generalization":[229],"efficiency":[230],"also":[232],"excellent:":[233],"only":[235],"100":[236],"instances,":[238],"score":[241],"reached":[242],"0.996.":[243]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4}],"updated_date":"2026-05-22T06:13:13.366637","created_date":"2025-10-10T00:00:00"}
