{"id":"https://openalex.org/W2013626638","doi":"https://doi.org/10.1145/2361354.2361395","title":"Sift","display_name":"Sift","publication_year":2012,"publication_date":"2012-09-04","ids":{"openalex":"https://openalex.org/W2013626638","doi":"https://doi.org/10.1145/2361354.2361395","mag":"2013626638"},"language":"en","primary_location":{"id":"doi:10.1145/2361354.2361395","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2361354.2361395","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2012 ACM symposium on Document engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068046597","display_name":"Matthias Geel","orcid":null},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Matthias Geel","raw_affiliation_strings":["ETH Zurich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041687430","display_name":"Timothy R. Church","orcid":"https://orcid.org/0000-0003-3292-5035"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Timothy Church","raw_affiliation_strings":["ETH Zurich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051077736","display_name":"Moira C. Norrie","orcid":"https://orcid.org/0000-0002-6864-8554"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Moira C. Norrie","raw_affiliation_strings":["ETH Zurich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5068046597"],"corresponding_institution_ids":["https://openalex.org/I35440088"],"apc_list":null,"apc_paid":null,"fwci":1.4211,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.86783373,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"181","last_page":"190"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.991599977016449,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8540041446685791},{"id":"https://openalex.org/keywords/markup-language","display_name":"Markup language","score":0.7142093181610107},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.6680507063865662},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.6591110229492188},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5492092967033386},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5362687706947327},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.49716952443122864},{"id":"https://openalex.org/keywords/semantic-web","display_name":"Semantic Web","score":0.4777992367744446},{"id":"https://openalex.org/keywords/semantic-web-stack","display_name":"Semantic Web Stack","score":0.46668514609336853},{"id":"https://openalex.org/keywords/data-web","display_name":"Data Web","score":0.4411822557449341},{"id":"https://openalex.org/keywords/html","display_name":"HTML","score":0.43174421787261963},{"id":"https://openalex.org/keywords/scale-invariant-feature-transform","display_name":"Scale-invariant feature transform","score":0.4109753668308258},{"id":"https://openalex.org/keywords/web-service","display_name":"Web service","score":0.34614187479019165},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.34561413526535034},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.2516505718231201},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.12267175316810608},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.11720937490463257}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8540041446685791},{"id":"https://openalex.org/C45874996","wikidata":"https://www.wikidata.org/wiki/Q37045","display_name":"Markup language","level":3,"score":0.7142093181610107},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.6680507063865662},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.6591110229492188},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5492092967033386},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5362687706947327},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.49716952443122864},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.4777992367744446},{"id":"https://openalex.org/C167379230","wikidata":"https://www.wikidata.org/wiki/Q1026884","display_name":"Semantic Web Stack","level":3,"score":0.46668514609336853},{"id":"https://openalex.org/C162005631","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Data Web","level":3,"score":0.4411822557449341},{"id":"https://openalex.org/C138708601","wikidata":"https://www.wikidata.org/wiki/Q8811","display_name":"HTML","level":3,"score":0.43174421787261963},{"id":"https://openalex.org/C61265191","wikidata":"https://www.wikidata.org/wiki/Q767770","display_name":"Scale-invariant feature transform","level":3,"score":0.4109753668308258},{"id":"https://openalex.org/C35578498","wikidata":"https://www.wikidata.org/wiki/Q193424","display_name":"Web service","level":2,"score":0.34614187479019165},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.34561413526535034},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.2516505718231201},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.12267175316810608},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.11720937490463257},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2361354.2361395","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2361354.2361395","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2012 ACM symposium on Document engineering","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W59280393","https://openalex.org/W1977761086","https://openalex.org/W1989113549","https://openalex.org/W1994053659","https://openalex.org/W1996300450","https://openalex.org/W2005646337","https://openalex.org/W2026533915","https://openalex.org/W2027491471","https://openalex.org/W2046545050","https://openalex.org/W2078324391","https://openalex.org/W2090656662","https://openalex.org/W2092140407","https://openalex.org/W2108991785","https://openalex.org/W2129595335","https://openalex.org/W2138405339","https://openalex.org/W2143309843","https://openalex.org/W2149033849","https://openalex.org/W2151192680","https://openalex.org/W2406734837","https://openalex.org/W6714056772"],"related_works":["https://openalex.org/W2042562985","https://openalex.org/W183683573","https://openalex.org/W3021385460","https://openalex.org/W2139931245","https://openalex.org/W2069569467","https://openalex.org/W2349698472","https://openalex.org/W2038021528","https://openalex.org/W2048480329","https://openalex.org/W2363068772","https://openalex.org/W2183626957"],"abstract_inverted_index":{"Although":[0],"web":[1,103,106],"sites":[2],"have":[3],"started":[4],"to":[5,19,23,44,81],"embed":[6],"semantic":[7,99],"metadata":[8],"within":[9],"their":[10],"documents,":[11],"it":[12],"remains":[13],"a":[14,62,87,115],"challenge":[15],"for":[16],"non-technical":[17],"end-users":[18],"exploit":[20],"that":[21,41,54],"markup":[22,80],"extract":[24],"and":[25,50,59,86,105],"store":[26],"information":[27,47,55],"of":[28,75,79,90,118],"interest.":[29],"To":[30],"address":[31],"this":[32],"challenge,":[33],"we":[34],"show":[35],"how":[36,53],"tools":[37],"can":[38],"be":[39,57],"developed":[40],"allow":[42],"users":[43],"identify":[45],"extractable":[46],"while":[48],"browsing":[49],"then":[51],"control":[52],"should":[56],"extracted":[58],"stored":[60],"in":[61],"personal":[63],"library.":[64],"The":[65],"proposed":[66],"approach":[67],"is":[68,114],"based":[69],"on":[70],"an":[71],"extensible":[72],"framework":[73],"capable":[74],"using":[76],"different":[77],"kinds":[78],"aid":[82],"the":[83,98,110,119],"extraction":[84],"process":[85],"unique":[88],"fusion":[89],"several":[91],"well-established":[92],"techniques":[93],"from":[94],"areas":[95],"such":[96],"as":[97],"web,":[100],"data":[101],"warehousing,":[102],"scraping":[104],"feeds.":[107],"We":[108],"present":[109],"Sift":[111],"tool":[112],"which":[113],"proof-of-concept":[116],"implementation":[117],"approach.":[120]},"counts_by_year":[{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2016-06-24T00:00:00"}
