{"id":"https://openalex.org/W2126909413","doi":"https://doi.org/10.3390/info5040634","title":"Deep Web Search Interface Identification: A Semi-Supervised Ensemble Approach","display_name":"Deep Web Search Interface Identification: A Semi-Supervised Ensemble Approach","publication_year":2014,"publication_date":"2014-12-01","ids":{"openalex":"https://openalex.org/W2126909413","doi":"https://doi.org/10.3390/info5040634","mag":"2126909413"},"language":"en","primary_location":{"id":"doi:10.3390/info5040634","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info5040634","pdf_url":"https://www.mdpi.com/2078-2489/5/4/634/pdf?version=1417502006","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2078-2489/5/4/634/pdf?version=1417502006","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035053697","display_name":"Hong Wang","orcid":"https://orcid.org/0000-0002-6938-9507"},"institutions":[{"id":"https://openalex.org/I139660479","display_name":"Central South University","ror":"https://ror.org/00f1zfq44","country_code":"CN","type":"education","lineage":["https://openalex.org/I139660479"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hong Wang","raw_affiliation_strings":["School of Mathematics & Statistics, Central South University, Changsha 410075, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Mathematics & Statistics, Central South University, Changsha 410075, China","institution_ids":["https://openalex.org/I139660479"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100763749","display_name":"Qing\u2010Song Xu","orcid":"https://orcid.org/0000-0002-1617-9581"},"institutions":[{"id":"https://openalex.org/I139660479","display_name":"Central South University","ror":"https://ror.org/00f1zfq44","country_code":"CN","type":"education","lineage":["https://openalex.org/I139660479"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingsong Xu","raw_affiliation_strings":["School of Mathematics & Statistics, Central South University, Changsha 410075, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Mathematics & Statistics, Central South University, Changsha 410075, China","institution_ids":["https://openalex.org/I139660479"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050510965","display_name":"Lifeng Zhou","orcid":"https://orcid.org/0000-0001-7038-0967"},"institutions":[{"id":"https://openalex.org/I139660479","display_name":"Central South University","ror":"https://ror.org/00f1zfq44","country_code":"CN","type":"education","lineage":["https://openalex.org/I139660479"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lifeng Zhou","raw_affiliation_strings":["School of Mathematics & Statistics, Central South University, Changsha 410075, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Mathematics & Statistics, Central South University, Changsha 410075, China","institution_ids":["https://openalex.org/I139660479"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5050510965"],"corresponding_institution_ids":["https://openalex.org/I139660479"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1400,"currency":"CHF","value_usd":1515},"fwci":0.8199,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.83470348,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"5","issue":"4","first_page":"634","last_page":"651"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11644","display_name":"Spam and Phishing Detection","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7908028960227966},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.6080037355422974},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.600846529006958},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5664106607437134},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5588847398757935},{"id":"https://openalex.org/keywords/html","display_name":"HTML","score":0.558584988117218},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5387488603591919},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.5043603181838989},{"id":"https://openalex.org/keywords/ensemble-learning","display_name":"Ensemble learning","score":0.4506238102912903},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4415890574455261},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.4262255132198334},{"id":"https://openalex.org/keywords/semi-supervised-learning","display_name":"Semi-supervised learning","score":0.4156688451766968},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.39964550733566284},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3505989909172058},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.3118593692779541},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.1519717574119568}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7908028960227966},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.6080037355422974},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.600846529006958},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5664106607437134},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5588847398757935},{"id":"https://openalex.org/C138708601","wikidata":"https://www.wikidata.org/wiki/Q8811","display_name":"HTML","level":3,"score":0.558584988117218},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5387488603591919},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.5043603181838989},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.4506238102912903},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4415890574455261},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.4262255132198334},{"id":"https://openalex.org/C58973888","wikidata":"https://www.wikidata.org/wiki/Q1041418","display_name":"Semi-supervised learning","level":2,"score":0.4156688451766968},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.39964550733566284},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3505989909172058},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.3118593692779541},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.1519717574119568},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/info5040634","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info5040634","pdf_url":"https://www.mdpi.com/2078-2489/5/4/634/pdf?version=1417502006","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:aef6e4cf9235458e93e523c84db8699d","is_oa":true,"landing_page_url":"https://doaj.org/article/aef6e4cf9235458e93e523c84db8699d","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information, Vol 5, Iss 4, Pp 634-651 (2014)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/2078-2489/5/4/634/","is_oa":true,"landing_page_url":"http://dx.doi.org/10.3390/info5040634","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/info5040634","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info5040634","pdf_url":"https://www.mdpi.com/2078-2489/5/4/634/pdf?version=1417502006","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.5600000023841858}],"awards":[{"id":"https://openalex.org/G1381552605","display_name":null,"funder_award_id":"61003233","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6955854771","display_name":null,"funder_award_id":"11271374","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2126909413.pdf","grobid_xml":"https://content.openalex.org/works/W2126909413.grobid-xml"},"referenced_works_count":46,"referenced_works":["https://openalex.org/W1479807131","https://openalex.org/W1498286998","https://openalex.org/W1505303261","https://openalex.org/W1565746575","https://openalex.org/W1720548684","https://openalex.org/W1906766771","https://openalex.org/W1969831559","https://openalex.org/W1973625941","https://openalex.org/W1983597490","https://openalex.org/W1988790447","https://openalex.org/W1998565453","https://openalex.org/W2003316857","https://openalex.org/W2030200470","https://openalex.org/W2045228650","https://openalex.org/W2048679005","https://openalex.org/W2061119986","https://openalex.org/W2070493638","https://openalex.org/W2071113239","https://openalex.org/W2079359777","https://openalex.org/W2085443648","https://openalex.org/W2094930182","https://openalex.org/W2096314212","https://openalex.org/W2112076978","https://openalex.org/W2113242816","https://openalex.org/W2130760429","https://openalex.org/W2131552526","https://openalex.org/W2134473739","https://openalex.org/W2136504847","https://openalex.org/W2137263761","https://openalex.org/W2145102654","https://openalex.org/W2145376937","https://openalex.org/W2148738951","https://openalex.org/W2151306141","https://openalex.org/W2153818524","https://openalex.org/W2158698691","https://openalex.org/W2164896748","https://openalex.org/W2167917621","https://openalex.org/W2911964244","https://openalex.org/W2912934387","https://openalex.org/W4212883601","https://openalex.org/W6629736100","https://openalex.org/W6630072960","https://openalex.org/W6674635078","https://openalex.org/W6676769703","https://openalex.org/W6681291871","https://openalex.org/W6682597120"],"related_works":["https://openalex.org/W4312414840","https://openalex.org/W34092691","https://openalex.org/W2794908468","https://openalex.org/W2531570999","https://openalex.org/W4206276646","https://openalex.org/W2943467239","https://openalex.org/W1571801203","https://openalex.org/W101422005","https://openalex.org/W192740413","https://openalex.org/W3004135598"],"abstract_inverted_index":{"To":[0],"surface":[1],"the":[2,65,102,110,128,131],"Deep":[3],"Web,":[4],"one":[5],"crucial":[6],"task":[7],"is":[8],"to":[9,44,58,74,78,99],"predict":[10],"whether":[11],"a":[12,17,86],"given":[13],"web":[14],"page":[15],"has":[16],"search":[18,80,103],"interface":[19,104],"(searchable":[20],"HyperText":[21],"Markup":[22],"Language":[23],"(HTML)":[24],"form)":[25],"or":[26],"not.":[27],"Previous":[28],"studies":[29],"have":[30],"focused":[31],"on":[32],"supervised":[33],"classification":[34],"with":[35,101],"labeled":[36,39,70,118],"examples.":[37],"However,":[38],"data":[40,73,126],"are":[41,54],"scarce,":[42],"hard":[43],"get":[45],"and":[46,56,71,96],"requires":[47],"tediousmanual":[48],"work,":[49],"while":[50],"unlabeled":[51,72,125],"HTML":[52],"forms":[53],"abundant":[55],"easy":[57],"obtain.":[59],"In":[60],"this":[61],"research,":[62],"we":[63],"consider":[64],"plausibility":[66],"of":[67,130],"using":[68,92,116],"both":[69,93],"train":[75],"better":[76],"models":[77],"identify":[79],"interfaces":[81],"more":[82],"effectively.":[83],"We":[84,107,120],"present":[85],"semi-supervised":[87],"co-training":[88],"ensemble":[89],"learning":[90],"approach":[91],"neural":[94],"networks":[95],"decision":[97],"trees":[98],"deal":[100],"identification":[105],"problem.":[106],"show":[108,122],"that":[109,123],"proposed":[111,132],"model":[112],"outperforms":[113],"previous":[114],"methods":[115],"only":[117],"data.":[119],"also":[121],"adding":[124],"improves":[127],"effectiveness":[129],"model.":[133]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2015,"cited_by_count":1}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
