{"id":"https://openalex.org/W2102694093","doi":"https://doi.org/10.1145/1135777.1135859","title":"Interactive wrapper generation with minimal user effort","display_name":"Interactive wrapper generation with minimal user effort","publication_year":2006,"publication_date":"2006-05-23","ids":{"openalex":"https://openalex.org/W2102694093","doi":"https://doi.org/10.1145/1135777.1135859","mag":"2102694093"},"language":"en","primary_location":{"id":"doi:10.1145/1135777.1135859","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1135777.1135859","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th international conference on World Wide Web","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060202357","display_name":"Utku Irmak","orcid":null},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Utku Irmak","raw_affiliation_strings":["Polytechnic University, Brooklyn, NY","Polytechnic University, Brooklyn, NY,#TAB#"],"affiliations":[{"raw_affiliation_string":"Polytechnic University, Brooklyn, NY","institution_ids":[]},{"raw_affiliation_string":"Polytechnic University, Brooklyn, NY,#TAB#","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5074323303","display_name":"Torsten Suel","orcid":"https://orcid.org/0000-0002-8324-980X"},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Torsten Suel","raw_affiliation_strings":["Polytechnic University, Brooklyn, NY","Polytechnic University, Brooklyn, NY,#TAB#"],"affiliations":[{"raw_affiliation_string":"Polytechnic University, Brooklyn, NY","institution_ids":[]},{"raw_affiliation_string":"Polytechnic University, Brooklyn, NY,#TAB#","institution_ids":["https://openalex.org/I57206974"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5060202357"],"corresponding_institution_ids":["https://openalex.org/I57206974"],"apc_list":null,"apc_paid":null,"fwci":19.7948,"has_fulltext":false,"cited_by_count":60,"citation_normalized_percentile":{"value":0.99057161,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"553","last_page":"563"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.9884999990463257,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.984499990940094,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7670177221298218},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.40192046761512756},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.34830695390701294}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7670177221298218},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.40192046761512756},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.34830695390701294}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1145/1135777.1135859","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1135777.1135859","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th international conference on World Wide Web","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.12.5550","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.12.5550","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://cis.poly.edu/~suel/papers/wrapper-poster.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.418.9118","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.418.9118","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://cis.poly.edu/~uirmak/publications/tr-cis-2005-02.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.74.933","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.74.933","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://cis.poly.edu/suel/papers/wrapper.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.839.8498","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.839.8498","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://read.pudn.com/downloads152/ebook/668247/Interactive%20wrapper%20generation%20with%20minimal%20user%20effort%202006.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.49000000953674316}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W11658723","https://openalex.org/W1533169541","https://openalex.org/W1553019137","https://openalex.org/W1553229631","https://openalex.org/W1602270052","https://openalex.org/W1821155018","https://openalex.org/W1921703248","https://openalex.org/W1927338256","https://openalex.org/W1989993068","https://openalex.org/W1990117407","https://openalex.org/W2005646337","https://openalex.org/W2037504378","https://openalex.org/W2052409393","https://openalex.org/W2054658115","https://openalex.org/W2065568440","https://openalex.org/W2073308541","https://openalex.org/W2093559286","https://openalex.org/W2096806473","https://openalex.org/W2104086170","https://openalex.org/W2116493296","https://openalex.org/W2140327372","https://openalex.org/W2143309843","https://openalex.org/W2143349571","https://openalex.org/W2147100344","https://openalex.org/W2148210463","https://openalex.org/W2153072229","https://openalex.org/W2153752143","https://openalex.org/W2160196229","https://openalex.org/W2162340487","https://openalex.org/W2177750903","https://openalex.org/W2277957941","https://openalex.org/W2593864877","https://openalex.org/W2912161846","https://openalex.org/W2914515968","https://openalex.org/W3015882749","https://openalex.org/W4248809068","https://openalex.org/W4285719527","https://openalex.org/W6633154970","https://openalex.org/W6681973738","https://openalex.org/W6694665639"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2350741829","https://openalex.org/W2530322880","https://openalex.org/W1596801655","https://openalex.org/W2359140296"],"abstract_inverted_index":{"While":[0],"much":[1],"of":[2,18,38,45,57,84,107,139,148,166,185],"the":[3,6,43,92,108,137],"data":[4,31,59,72,125],"on":[5,26,32,42,123,157],"web":[7],"is":[8,13,134,154],"unstructured":[9],"in":[10,104,127],"nature,":[11],"there":[12],"also":[14],"a":[15,85,112,128,149,158,164,181],"significant":[16],"amount":[17,37,138,184],"embedded":[19],"structured":[20,58],"data,":[21],"such":[22,68],"as":[23,69],"product":[24],"information":[25],"e-commerce":[27],"sites":[28],"or":[29],"stock":[30],"financial":[33],"sites.":[34],"A":[35],"large":[36],"research":[39],"has":[40,73],"focused":[41],"problem":[44],"generating":[46],"wrappers,":[47],"i.e.,":[48],"software":[49],"tools":[50],"that":[51,119,153,174],"allow":[52],"easy":[53],"and":[54,62,163,168],"robust":[55],"extraction":[56,161,179],"from":[60,77],"text":[61],"HTML":[63],"sources.":[64],"In":[65],"many":[66,78],"applications,":[67],"comparison":[70],"shopping,":[71],"to":[74,135],"be":[75,121],"extracted":[76,109],"different":[79,124],"sources,":[80],"making":[81],"manual":[82],"coding":[83],"wrapper":[86,117],"for":[87,115,142],"each":[88],"source":[89],"impractical.":[90],"On":[91],"other":[93],"hand,":[94],"fully":[95],"automatic":[96],"approaches":[97],"are":[98],"often":[99],"not":[100],"reliable":[101,144,178],"enough,":[102],"resulting":[103],"low":[105],"quality":[106],"data.We":[110],"describe":[111],"complete":[113],"system":[114,176],"semi-automatic":[116],"generation":[118],"can":[120],"trained":[122],"sources":[126],"simple":[129],"interactive":[130],"manner.":[131],"Our":[132,171],"goal":[133],"minimize":[136],"user":[140,186],"effort":[141],"training":[143,151,167],"wrappers":[145],"through":[146],"design":[147],"suitable":[150],"interface":[152],"implemented":[155],"based":[156],"powerful":[159],"underlying":[160],"language":[162],"set":[165],"ranking":[169],"algorithms.":[170],"experiments":[172],"show":[173],"our":[175],"achieves":[177],"with":[180],"very":[182],"small":[183],"effort.":[187]},"counts_by_year":[{"year":2022,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":9},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":3}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
