{"id":"https://openalex.org/W2138396769","doi":"https://doi.org/10.1109/icsmc.2010.5642466","title":"Deep web data extraction","display_name":"Deep web data extraction","publication_year":2010,"publication_date":"2010-10-01","ids":{"openalex":"https://openalex.org/W2138396769","doi":"https://doi.org/10.1109/icsmc.2010.5642466","mag":"2138396769"},"language":"en","primary_location":{"id":"doi:10.1109/icsmc.2010.5642466","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icsmc.2010.5642466","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 IEEE International Conference on Systems, Man and Cybernetics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076993574","display_name":"Jer Lang Hong","orcid":null},"institutions":[{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]},{"id":"https://openalex.org/I11662577","display_name":"Monash University Malaysia","ror":"https://ror.org/00yncr324","country_code":"MY","type":"education","lineage":["https://openalex.org/I11662577"]}],"countries":["AU","MY"],"is_corresponding":true,"raw_author_name":"Jer Lang Hong","raw_affiliation_strings":["School of IT, Monash University, Malaysia","[School of IT, Monash University, Australia]"],"affiliations":[{"raw_affiliation_string":"School of IT, Monash University, Malaysia","institution_ids":["https://openalex.org/I11662577"]},{"raw_affiliation_string":"[School of IT, Monash University, Australia]","institution_ids":["https://openalex.org/I56590836"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5076993574"],"corresponding_institution_ids":["https://openalex.org/I11662577","https://openalex.org/I56590836"],"apc_list":null,"apc_paid":null,"fwci":7.6308,"has_fulltext":false,"cited_by_count":29,"citation_normalized_percentile":{"value":0.97183645,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3420","last_page":"3427"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.9837999939918518,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13976","display_name":"Web visibility and informetrics","score":0.9696000218391418,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.841551661491394},{"id":"https://openalex.org/keywords/document-object-model","display_name":"Document Object Model","score":0.7656985521316528},{"id":"https://openalex.org/keywords/wordnet","display_name":"WordNet","score":0.719028890132904},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6760938167572021},{"id":"https://openalex.org/keywords/deep-web","display_name":"Deep Web","score":0.5957518815994263},{"id":"https://openalex.org/keywords/data-extraction","display_name":"Data extraction","score":0.553410530090332},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.49184632301330566},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.4787306487560272},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.46801719069480896},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46754685044288635},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4366757571697235},{"id":"https://openalex.org/keywords/lexical-database","display_name":"Lexical database","score":0.42877569794654846},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4164714515209198},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.23562586307525635},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2258930802345276}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.841551661491394},{"id":"https://openalex.org/C137922610","wikidata":"https://www.wikidata.org/wiki/Q2093","display_name":"Document Object Model","level":3,"score":0.7656985521316528},{"id":"https://openalex.org/C157659113","wikidata":"https://www.wikidata.org/wiki/Q533822","display_name":"WordNet","level":2,"score":0.719028890132904},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6760938167572021},{"id":"https://openalex.org/C46721378","wikidata":"https://www.wikidata.org/wiki/Q221989","display_name":"Deep Web","level":3,"score":0.5957518815994263},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.553410530090332},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.49184632301330566},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.4787306487560272},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.46801719069480896},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46754685044288635},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4366757571697235},{"id":"https://openalex.org/C2780403423","wikidata":"https://www.wikidata.org/wiki/Q6537700","display_name":"Lexical database","level":3,"score":0.42877569794654846},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4164714515209198},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.23562586307525635},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2258930802345276},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icsmc.2010.5642466","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icsmc.2010.5642466","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2010 IEEE International Conference on Systems, Man and Cybernetics","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1541624846","https://openalex.org/W1569415500","https://openalex.org/W1974550371","https://openalex.org/W1984486227","https://openalex.org/W2015551056","https://openalex.org/W2035302703","https://openalex.org/W2038721957","https://openalex.org/W2067416210","https://openalex.org/W2087739686","https://openalex.org/W2100935296","https://openalex.org/W2127563440","https://openalex.org/W2128341918","https://openalex.org/W2133669904","https://openalex.org/W2134126205","https://openalex.org/W2134907429","https://openalex.org/W2136480620","https://openalex.org/W2140887277","https://openalex.org/W2143309843","https://openalex.org/W2154872327","https://openalex.org/W2160189941","https://openalex.org/W2171364811","https://openalex.org/W2534712034","https://openalex.org/W4232071090","https://openalex.org/W4235505822","https://openalex.org/W6632252279","https://openalex.org/W6634008025","https://openalex.org/W6675339850","https://openalex.org/W6681340555","https://openalex.org/W6683516198"],"related_works":["https://openalex.org/W2163292279","https://openalex.org/W2040624659","https://openalex.org/W3196687212","https://openalex.org/W2749919855","https://openalex.org/W4241608514","https://openalex.org/W2321197640","https://openalex.org/W1209047547","https://openalex.org/W1515748455","https://openalex.org/W2134068523","https://openalex.org/W2207408097"],"abstract_inverted_index":{"Current":[0],"automatic":[1],"wrappers":[2],"using":[3,76],"DOM":[4],"tree":[5,33,55],"and":[6,54,134,148,162,183],"visual":[7,52,124],"properties":[8,53],"of":[9,32,86,105,151,169,190],"data":[10,40,87,107,111,116,138,152,191],"records":[11,41,88,192],"to":[12,28,71,101,132,186],"extract":[13,135,187],"the":[14,18,26,30,44,84,103,115,127,136,141,146,166,170],"required":[15],"information":[16],"from":[17,89,126,140],"deep":[19,45,90,142],"web":[20,46,91,143],"generally":[21],"have":[22],"limitations":[23],"such":[24,67],"as":[25],"inability":[27],"check":[29],"similarity":[31],"structures":[34],"accurately.":[35],"Our":[36],"study":[37],"shows":[38],"that":[39,157],"located":[42],"in":[43,63],"do":[47],"not":[48],"only":[49],"share":[50],"similar":[51],"structures,":[56],"but":[57],"they":[58],"are":[59,69,99],"also":[60],"related":[61],"semantically":[62],"their":[64],"contents.":[65],"As":[66],"we":[68,122],"able":[70,100,185],"propose":[72],"an":[73],"ontological":[74,97,175],"technique":[75,98,159],"existing":[77,167,174],"lexical":[78],"database":[79],"for":[80,83,110],"English":[81],"(WordNet)":[82],"extraction":[85,117],"pages.":[92],"Wrappers":[93],"designed":[94],"based":[95,176],"on":[96],"reduce":[102],"number":[104],"potential":[106],"regions":[108],"identified":[109],"extraction,":[112],"thus":[113],"improve":[114],"accuracy.":[118],"In":[119],"this":[120],"study,":[121],"use":[123],"cue":[125],"underlying":[128],"browser":[129],"rendering":[130],"engine":[131],"locate":[133],"relevant":[137],"region":[139],"by":[144],"measuring":[145],"text":[147],"image":[149],"sizes":[150],"records.":[153],"Experimental":[154],"results":[155],"show":[156],"our":[158,178],"is":[160,180,184],"robust":[161],"performs":[163],"better":[164],"than":[165],"state":[168],"art":[171],"wrappers.":[172],"Unlike":[173],"wrappers,":[177],"wrapper":[179],"domain":[181],"independent":[182],"wide":[188],"range":[189],"with":[193],"different":[194],"structures.":[195]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":4},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
