{"id":"https://openalex.org/W2145125025","doi":"https://doi.org/10.1109/cmpsac.2003.1245412","title":"A supervised visual wrapper generator for Web-data extraction","display_name":"A supervised visual wrapper generator for Web-data extraction","publication_year":2004,"publication_date":"2004-03-02","ids":{"openalex":"https://openalex.org/W2145125025","doi":"https://doi.org/10.1109/cmpsac.2003.1245412","mag":"2145125025"},"language":"en","primary_location":{"id":"doi:10.1109/cmpsac.2003.1245412","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cmpsac.2003.1245412","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings 27th Annual International Computer Software and Applications Conference. COMPAC 2003","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109258303","display_name":"Xiaofeng Meng","orcid":null},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiaofeng Meng","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China","School of Information, RenMin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]},{"raw_affiliation_string":"School of Information, RenMin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091211890","display_name":"Haiyan Wang","orcid":"https://orcid.org/0000-0002-5204-967X"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haiyan Wang","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China","School of Information, RenMin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]},{"raw_affiliation_string":"School of Information, RenMin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069427811","display_name":"Dongdong Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongdong Hu","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China","School of Information, RenMin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]},{"raw_affiliation_string":"School of Information, RenMin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100369885","display_name":"Chen Li","orcid":"https://orcid.org/0000-0002-8784-8148"},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chen Li","raw_affiliation_strings":["School of Information and CS, University of California, Irvine, CA, USA","[University of California, Irvine]"],"affiliations":[{"raw_affiliation_string":"School of Information and CS, University of California, Irvine, CA, USA","institution_ids":["https://openalex.org/I204250578"]},{"raw_affiliation_string":"[University of California, Irvine]","institution_ids":["https://openalex.org/I204250578"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5109258303"],"corresponding_institution_ids":["https://openalex.org/I78988378"],"apc_list":null,"apc_paid":null,"fwci":3.1563,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.93114375,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"17","issue":null,"first_page":"657","last_page":"662"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9542999863624573,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9444000124931335,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8724207282066345},{"id":"https://openalex.org/keywords/schema","display_name":"Schema (genetic algorithms)","score":0.7645185589790344},{"id":"https://openalex.org/keywords/generator","display_name":"Generator (circuit theory)","score":0.5307400822639465},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4827272593975067},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.47537413239479065},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4657595455646515},{"id":"https://openalex.org/keywords/data-extraction","display_name":"Data extraction","score":0.46133410930633545},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.445840060710907},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.256594181060791}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8724207282066345},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.7645185589790344},{"id":"https://openalex.org/C2780992000","wikidata":"https://www.wikidata.org/wiki/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.5307400822639465},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4827272593975067},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.47537413239479065},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4657595455646515},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.46133410930633545},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.445840060710907},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.256594181060791},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cmpsac.2003.1245412","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cmpsac.2003.1245412","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings 27th Annual International Computer Software and Applications Conference. COMPAC 2003","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1489949474","https://openalex.org/W1498241032","https://openalex.org/W1553019137","https://openalex.org/W1602270052","https://openalex.org/W1921703248","https://openalex.org/W1927338256","https://openalex.org/W2026080185","https://openalex.org/W2097519514","https://openalex.org/W2136500370","https://openalex.org/W2147100344","https://openalex.org/W2148210463","https://openalex.org/W2165555615","https://openalex.org/W2177750903","https://openalex.org/W6629296869","https://openalex.org/W6633154970","https://openalex.org/W6635982700","https://openalex.org/W6640116194","https://openalex.org/W6674507653","https://openalex.org/W6681973738"],"related_works":["https://openalex.org/W2611741382","https://openalex.org/W3135843367","https://openalex.org/W2613685774","https://openalex.org/W3112355890","https://openalex.org/W4220741973","https://openalex.org/W2096382848","https://openalex.org/W2099278314","https://openalex.org/W36911888","https://openalex.org/W4385731361","https://openalex.org/W4280620143"],"abstract_inverted_index":{"Extracting":[0],"data":[1,48,77],"from":[2,55,78],"Web":[3],"pages":[4],"using":[5],"wrappers":[6],"is":[7],"a":[8,13,27,36,56],"fundamental":[9],"problem":[10],"arising":[11],"in":[12,94],"large":[14],"variety":[15],"of":[16,18,46,91,115],"applications":[17],"vast":[19],"practical":[20],"interest.":[21],"In":[22],"this":[23,95],"paper,":[24],"we":[25],"propose":[26],"novel":[28],"schema-guided":[29],"approach":[30,82],"to":[31,42,49,59,75,83,102],"wrapper":[32,84],"generation.":[33],"We":[34],"provide":[35],"user-friendly":[37],"interface":[38],"that":[39],"allows":[40],"users":[41],"define":[43],"the":[44,47,60,65,67,79,89,98,105,113],"schema":[45],"be":[50],"extracted,":[51],"and":[52],"specifies":[53],"mappings":[54],"HTML":[57],"page":[58],"target":[61],"schema.":[62],"Based":[63],"on":[64],"mappings,":[66],"system":[68],"can":[69,86],"automatically":[70],"generate":[71],"an":[72],"extraction":[73,107],"rule":[74],"extract":[76],"page.":[80],"Our":[81],"generation":[85],"significantly":[87],"reduce":[88],"work":[90],"human":[92],"beings":[93],"process.":[96],"And":[97],"user":[99],"never":[100],"has":[101],"deal":[103],"with":[104,112],"internal":[106],"rule,":[108],"or":[109],"even":[110],"familiarity":[111],"details":[114],"HTML.":[116]},"counts_by_year":[{"year":2014,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
