{"id":"https://openalex.org/W3210119954","doi":"https://doi.org/10.1109/dsaa53316.2021.9564205","title":"An Overview on Supervised Semi-structured Data Classification","display_name":"An Overview on Supervised Semi-structured Data Classification","publication_year":2021,"publication_date":"2021-10-06","ids":{"openalex":"https://openalex.org/W3210119954","doi":"https://doi.org/10.1109/dsaa53316.2021.9564205","mag":"3210119954"},"language":"en","primary_location":{"id":"doi:10.1109/dsaa53316.2021.9564205","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dsaa53316.2021.9564205","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE 8th International Conference on Data Science and Advanced Analytics (DSAA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100448165","display_name":"Lijun Zhang","orcid":"https://orcid.org/0000-0002-9372-1004"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lijun Zhang","raw_affiliation_strings":["School of Computer Science Key Laboratory of Big Data Storage and Management, MIIT, Northwestern Polytechnical University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science Key Laboratory of Big Data Storage and Management, MIIT, Northwestern Polytechnical University, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100773104","display_name":"Ning Li","orcid":"https://orcid.org/0000-0001-7394-0640"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ning Li","raw_affiliation_strings":["School of Computer Science Key Laboratory of Big Data Storage and Management, MIIT, Northwestern Polytechnical University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science Key Laboratory of Big Data Storage and Management, MIIT, Northwestern Polytechnical University, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041615627","display_name":"Zhanhuai Li","orcid":"https://orcid.org/0009-0003-6936-5745"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhanhuai Li","raw_affiliation_strings":["School of Computer Science Key Laboratory of Big Data Storage and Management, MIIT, Northwestern Polytechnical University, Xi'an, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science Key Laboratory of Big Data Storage and Management, MIIT, Northwestern Polytechnical University, Xi'an, China","institution_ids":["https://openalex.org/I17145004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100448165"],"corresponding_institution_ids":["https://openalex.org/I17145004"],"apc_list":null,"apc_paid":null,"fwci":0.4079,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.69483463,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.9864000082015991,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9850000143051147,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7909409403800964},{"id":"https://openalex.org/keywords/semi-structured-data","display_name":"Semi-structured data","score":0.6026020646095276},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5385258197784424},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.512262761592865},{"id":"https://openalex.org/keywords/data-classification","display_name":"Data classification","score":0.47753870487213135},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.47437721490859985},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.4456329643726349},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.4173913896083832},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4148879647254944},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4141653776168823},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.328651487827301},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.15926042199134827},{"id":"https://openalex.org/keywords/relational-database","display_name":"Relational database","score":0.09548169374465942}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7909409403800964},{"id":"https://openalex.org/C40077939","wikidata":"https://www.wikidata.org/wiki/Q2336004","display_name":"Semi-structured data","level":3,"score":0.6026020646095276},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5385258197784424},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.512262761592865},{"id":"https://openalex.org/C2780724565","wikidata":"https://www.wikidata.org/wiki/Q5227256","display_name":"Data classification","level":2,"score":0.47753870487213135},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.47437721490859985},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.4456329643726349},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.4173913896083832},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4148879647254944},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4141653776168823},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.328651487827301},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.15926042199134827},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.09548169374465942},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dsaa53316.2021.9564205","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dsaa53316.2021.9564205","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE 8th International Conference on Data Science and Advanced Analytics (DSAA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4061782804","display_name":null,"funder_award_id":"61972317,61672432,61732014","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4688772643","display_name":null,"funder_award_id":"3102015JSJ0004","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":88,"referenced_works":["https://openalex.org/W46681494","https://openalex.org/W67881473","https://openalex.org/W135761774","https://openalex.org/W560079301","https://openalex.org/W1472662514","https://openalex.org/W1505155935","https://openalex.org/W1550206324","https://openalex.org/W1575842006","https://openalex.org/W1582862366","https://openalex.org/W1628571627","https://openalex.org/W1814912083","https://openalex.org/W1845035324","https://openalex.org/W1880452510","https://openalex.org/W1907380269","https://openalex.org/W1969572066","https://openalex.org/W1993717606","https://openalex.org/W2014364160","https://openalex.org/W2019797988","https://openalex.org/W2024428474","https://openalex.org/W2038721957","https://openalex.org/W2042184006","https://openalex.org/W2042961901","https://openalex.org/W2046341811","https://openalex.org/W2061022164","https://openalex.org/W2061354549","https://openalex.org/W2063198646","https://openalex.org/W2066619786","https://openalex.org/W2070046067","https://openalex.org/W2079629881","https://openalex.org/W2087609354","https://openalex.org/W2102247866","https://openalex.org/W2106740416","https://openalex.org/W2111072639","https://openalex.org/W2116020486","https://openalex.org/W2117849706","https://openalex.org/W2121971770","https://openalex.org/W2134222424","https://openalex.org/W2134603844","https://openalex.org/W2140190241","https://openalex.org/W2143774317","https://openalex.org/W2145059817","https://openalex.org/W2149684865","https://openalex.org/W2151983993","https://openalex.org/W2153010910","https://openalex.org/W2162955638","https://openalex.org/W2163360598","https://openalex.org/W2163598528","https://openalex.org/W2164539421","https://openalex.org/W2165612380","https://openalex.org/W2165967751","https://openalex.org/W2166276973","https://openalex.org/W2166362078","https://openalex.org/W2171551716","https://openalex.org/W2172000360","https://openalex.org/W2258183698","https://openalex.org/W2265846598","https://openalex.org/W2326036462","https://openalex.org/W2326364803","https://openalex.org/W2330219538","https://openalex.org/W2335920051","https://openalex.org/W2343796173","https://openalex.org/W2375256927","https://openalex.org/W2385636725","https://openalex.org/W2517878783","https://openalex.org/W2520027883","https://openalex.org/W2537675719","https://openalex.org/W2585848857","https://openalex.org/W2739996966","https://openalex.org/W2799027221","https://openalex.org/W2911379778","https://openalex.org/W2963912736","https://openalex.org/W2977642314","https://openalex.org/W2995732134","https://openalex.org/W2996149946","https://openalex.org/W3144577582","https://openalex.org/W4235505822","https://openalex.org/W4239024420","https://openalex.org/W6602738186","https://openalex.org/W6605505115","https://openalex.org/W6632865047","https://openalex.org/W6636741502","https://openalex.org/W6684019904","https://openalex.org/W6684073765","https://openalex.org/W6684327724","https://openalex.org/W6693505360","https://openalex.org/W6771792153","https://openalex.org/W6792917946","https://openalex.org/W7052822230"],"related_works":["https://openalex.org/W2380784125","https://openalex.org/W3004057759","https://openalex.org/W2059321491","https://openalex.org/W3128438030","https://openalex.org/W2383224923","https://openalex.org/W2365921522","https://openalex.org/W2527777278","https://openalex.org/W3216472057","https://openalex.org/W2010770252","https://openalex.org/W3210119954"],"abstract_inverted_index":{"Many":[0],"collaboratively":[1],"building":[2],"resources,":[3],"such":[4,26],"as":[5,27],"Wikipedia,":[6],"Weibo":[7],"and":[8,38,111,133,156,161],"Quora,":[9],"exist":[10],"in":[11,24,74,85,147],"the":[12,48,83,86],"form":[13],"of":[14,44,82,88,106,118,140],"semi-structured":[15,18,45,50,67,90,97,119,141],"data.":[16],"The":[17],"data":[19,28,30,32,34,46,51,68,91,98,120,142,170],"has":[20],"been":[21],"widely":[22],"used":[23],"areas":[25],"integration,":[29],"distribution,":[31],"storage,":[33],"management,":[35],"information":[36],"retrieval":[37],"knowledge":[39],"management.":[40],"For":[41],"large":[42,138],"volumes":[43],"on":[47,153,158],"Web,":[49],"classification":[52,69,99,143,171],"technique":[53],"can":[54],"group":[55],"them":[56],"into":[57],"different":[58,115],"categories":[59],"by":[60],"their":[61],"structure":[62,154,160],"and/or":[63],"content":[64],"information.":[65],"Supervised":[66],"plays":[70],"an":[71,80],"important":[72],"role":[73],"many":[75],"applications.":[76],"This":[77],"paper":[78],"provides":[79],"overview":[81],"literature":[84],"area":[87],"supervised":[89],"classification.":[92],"A":[93,137],"general":[94],"framework":[95],"for":[96,168],"is":[100,103],"presented,":[101],"which":[102],"mainly":[104,123],"composed":[105],"two":[107,150],"steps:":[108],"feature":[109,129,134],"extraction":[110],"model":[112,132],"building.":[113],"Several":[114],"representation":[116],"models":[117],"are":[121,145,172],"discussed,":[122],"including":[124],"rooted":[125],"labeled":[126],"tree":[127],"model,":[128],"vector":[130],"space":[131],"set":[135],"model.":[136],"selection":[139],"approaches":[144],"reviewed":[146],"detail":[148],"from":[149],"aspects:":[151],"based":[152,157],"only":[155],"both":[159],"content.":[162],"Finally,":[163],"several":[164],"future":[165],"research":[166],"directions":[167],"semistructured":[169],"presented.":[173]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
