{"id":"https://openalex.org/W2133434861","doi":"https://doi.org/10.1145/1242572.1242844","title":"U-REST","display_name":"U-REST","publication_year":2007,"publication_date":"2007-05-08","ids":{"openalex":"https://openalex.org/W2133434861","doi":"https://doi.org/10.1145/1242572.1242844","mag":"2133434861"},"language":"en","primary_location":{"id":"doi:10.1145/1242572.1242844","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1242572.1242844","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th international conference on World Wide Web","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101989369","display_name":"Yuan Shen","orcid":"https://orcid.org/0000-0002-2697-4239"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yuan Kui Shen","raw_affiliation_strings":["MIT Computer Science and Artificial Intelligence Laboratory, Cambridge, MA","MIT, Computer Science and Artificial Intelligence Laboratory, Cambridge, MA"],"affiliations":[{"raw_affiliation_string":"MIT Computer Science and Artificial Intelligence Laboratory, Cambridge, MA","institution_ids":["https://openalex.org/I63966007"]},{"raw_affiliation_string":"MIT, Computer Science and Artificial Intelligence Laboratory, Cambridge, MA","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028448267","display_name":"David R. Karger","orcid":"https://orcid.org/0000-0002-0024-5847"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David R. Karger","raw_affiliation_strings":["MIT Computer Science and Artificial Intelligence Laboratory, Cambridge, MA","MIT, Computer Science and Artificial Intelligence Laboratory, Cambridge, MA"],"affiliations":[{"raw_affiliation_string":"MIT Computer Science and Artificial Intelligence Laboratory, Cambridge, MA","institution_ids":["https://openalex.org/I63966007"]},{"raw_affiliation_string":"MIT, Computer Science and Artificial Intelligence Laboratory, Cambridge, MA","institution_ids":["https://openalex.org/I63966007"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101989369"],"corresponding_institution_ids":["https://openalex.org/I63966007"],"apc_list":null,"apc_paid":null,"fwci":0.7913,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.84506353,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1347","last_page":"1348"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12479","display_name":"Web Application Security Vulnerabilities","score":0.9842000007629395,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.806306004524231},{"id":"https://openalex.org/keywords/rest","display_name":"Rest (music)","score":0.7724379897117615},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7408168315887451},{"id":"https://openalex.org/keywords/tuple","display_name":"Tuple","score":0.5640116930007935},{"id":"https://openalex.org/keywords/unsupervised-learning","display_name":"Unsupervised learning","score":0.5515640377998352},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5347589254379272},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5330184102058411},{"id":"https://openalex.org/keywords/feature-engineering","display_name":"Feature engineering","score":0.48645326495170593},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4472115635871887},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4345625340938568},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.42643797397613525},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.4221956729888916},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.34760135412216187},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.13159701228141785},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.0677042305469513}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.806306004524231},{"id":"https://openalex.org/C77265313","wikidata":"https://www.wikidata.org/wiki/Q879844","display_name":"Rest (music)","level":2,"score":0.7724379897117615},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7408168315887451},{"id":"https://openalex.org/C118930307","wikidata":"https://www.wikidata.org/wiki/Q600590","display_name":"Tuple","level":2,"score":0.5640116930007935},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.5515640377998352},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5347589254379272},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5330184102058411},{"id":"https://openalex.org/C2778827112","wikidata":"https://www.wikidata.org/wiki/Q22245680","display_name":"Feature engineering","level":3,"score":0.48645326495170593},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4472115635871887},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4345625340938568},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.42643797397613525},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.4221956729888916},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.34760135412216187},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.13159701228141785},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0677042305469513},{"id":"https://openalex.org/C118615104","wikidata":"https://www.wikidata.org/wiki/Q121416","display_name":"Discrete mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C164705383","wikidata":"https://www.wikidata.org/wiki/Q10379","display_name":"Cardiology","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1242572.1242844","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1242572.1242844","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 16th international conference on World Wide Web","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":5,"referenced_works":["https://openalex.org/W2015551056","https://openalex.org/W2128341918","https://openalex.org/W2128836931","https://openalex.org/W2138405339","https://openalex.org/W2143309843"],"related_works":["https://openalex.org/W4388147713","https://openalex.org/W4245395944","https://openalex.org/W4205145096","https://openalex.org/W2977909229","https://openalex.org/W2143551613","https://openalex.org/W1979740464","https://openalex.org/W2359059303","https://openalex.org/W3194775706","https://openalex.org/W3110334367","https://openalex.org/W2143345456"],"abstract_inverted_index":{"In":[0,103],"this":[1],"paper,":[2],"we":[3,106],"describe":[4,26],"a":[5,35,66],"system":[6],"that":[7,25],"can":[8],"extract":[9],"recordstructures":[10],"from":[11,50],"web":[12],"pages":[13],"with":[14],"no":[15],"direct":[16],"human":[17],"supervision.Records":[18],"are":[19,81,95],"commonly":[20],"occurring":[21],"HTML-embedded":[22],"data":[23],"tuples":[24],"people,":[27],"offered":[28],"courses,":[29],"products,company":[30],"profiles,":[31],"etc.":[32],"We":[33,121],"present":[34],"simplified":[36],"frameworkfor":[37],"studying":[38],"the":[39,48,51,84,110],"problem":[40,60],"of":[41,61,93,100,112,116,131],"unsupervised":[42,62],"record":[43,63],"extraction.":[44],"one":[45],"which":[46],"separates":[47],"algorithms":[49],"feature":[52],"engineering.Our":[53],"system,":[54],"U-REST":[55,125],"formalizes":[56],"an":[57,113],"approach":[58],"tothe":[59],"extraction":[64,139],"using":[65],"simple":[67],"two-stage":[68],"machine":[69],"learning":[70],"framework.":[71],"The":[72,129],"first":[73],"stage":[74,86],"involves":[75,87],"clustering,":[76],"where":[77,89],"structurally":[78],"similar":[79],"regions":[80],"discovered,":[82],"and":[83,108],"second":[85],"classification,":[88],"discovered":[90],"groupings":[91],"(clusters":[92],"regions)":[94],"ranked":[96],"by":[97,123],"their":[98],"likelihood":[99],"being":[101],"records.":[102],"our":[104,132],"work,":[105],"describe,":[107],"summarize":[109],"results":[111,130],"extensive":[114],"survey":[115],"features":[117],"for":[118],"both":[119],"stages.":[120],"conclude":[122],"comparing":[124],"to":[126],"related":[127],"systems.":[128],"empirical":[133],"evaluation":[134],"show":[135],"encouraging":[136],"improvements":[137],"in":[138],"accuracy.":[140]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2016-06-24T00:00:00"}
