{"id":"https://openalex.org/W2064466058","doi":"https://doi.org/10.1145/1055558.1055560","title":"The Lixto data extraction project","display_name":"The Lixto data extraction project","publication_year":2004,"publication_date":"2004-06-14","ids":{"openalex":"https://openalex.org/W2064466058","doi":"https://doi.org/10.1145/1055558.1055560","mag":"2064466058"},"language":"en","primary_location":{"id":"doi:10.1145/1055558.1055560","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1055558.1055560","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the twenty-third ACM SIGMOD-SIGACT-SIGART symposium on Principles of database systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://infoscience.epfl.ch/record/166879","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050151740","display_name":"Georg Gottlob","orcid":"https://orcid.org/0000-0002-2353-5230"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Georg Gottlob","raw_affiliation_strings":["DBAI, TU Wien, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DBAI, TU Wien, Austria","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101903922","display_name":"Christoph Koch","orcid":"https://orcid.org/0000-0002-9130-7205"},"institutions":[{"id":"https://openalex.org/I145847075","display_name":"TU Wien","ror":"https://ror.org/04d836q62","country_code":"AT","type":"education","lineage":["https://openalex.org/I145847075"]}],"countries":["AT"],"is_corresponding":false,"raw_author_name":"Christoph Koch","raw_affiliation_strings":["DBAI, TU Wien, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"DBAI, TU Wien, Austria","institution_ids":["https://openalex.org/I145847075"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055923468","display_name":"Robert Baumgartner","orcid":"https://orcid.org/0000-0003-0899-4903"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Robert Baumgartner","raw_affiliation_strings":["Lixto Software GmbH, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Lixto Software GmbH, Austria","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111482934","display_name":"Marcus Herzog","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marcus Herzog","raw_affiliation_strings":["Lixto Software GmbH, Austria"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Lixto Software GmbH, Austria","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002811447","display_name":"Sergio Flesca","orcid":"https://orcid.org/0000-0002-4164-940X"},"institutions":[{"id":"https://openalex.org/I45204951","display_name":"University of Calabria","ror":"https://ror.org/02rc97e94","country_code":"IT","type":"education","lineage":["https://openalex.org/I45204951"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Sergio Flesca","raw_affiliation_strings":["D.E.I.S. - Universit\u00e0 della Calabria, Italy"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"D.E.I.S. - Universit\u00e0 della Calabria, Italy","institution_ids":["https://openalex.org/I45204951"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":28.2684,"has_fulltext":false,"cited_by_count":135,"citation_normalized_percentile":{"value":0.99447502,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.849309504032135},{"id":"https://openalex.org/keywords/datalog","display_name":"Datalog","score":0.7587592601776123},{"id":"https://openalex.org/keywords/web-service","display_name":"Web service","score":0.5162441730499268},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4841497540473938},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4105623960494995},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.40873947739601135},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3849579691886902},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3751262128353119},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.3730494976043701}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.849309504032135},{"id":"https://openalex.org/C148230440","wikidata":"https://www.wikidata.org/wiki/Q1172264","display_name":"Datalog","level":2,"score":0.7587592601776123},{"id":"https://openalex.org/C35578498","wikidata":"https://www.wikidata.org/wiki/Q193424","display_name":"Web service","level":2,"score":0.5162441730499268},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4841497540473938},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4105623960494995},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.40873947739601135},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3849579691886902},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3751262128353119},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.3730494976043701}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/1055558.1055560","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1055558.1055560","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the twenty-third ACM SIGMOD-SIGACT-SIGART symposium on Principles of database systems","raw_type":"proceedings-article"},{"id":"pmh:oai:infoscience.epfl.ch:166879","is_oa":true,"landing_page_url":"http://infoscience.epfl.ch/record/166879","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:infoscience.epfl.ch:166879","is_oa":true,"landing_page_url":"http://infoscience.epfl.ch/record/166879","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.5400000214576721}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W885045","https://openalex.org/W1497287285","https://openalex.org/W1499023380","https://openalex.org/W1509288318","https://openalex.org/W1527664624","https://openalex.org/W1540516609","https://openalex.org/W1553019137","https://openalex.org/W1558832481","https://openalex.org/W1569375617","https://openalex.org/W1816466119","https://openalex.org/W1816620374","https://openalex.org/W1821155018","https://openalex.org/W1927338256","https://openalex.org/W1969005071","https://openalex.org/W1969965298","https://openalex.org/W1970599921","https://openalex.org/W1991958955","https://openalex.org/W1995214664","https://openalex.org/W2035020702","https://openalex.org/W2046652577","https://openalex.org/W2052409393","https://openalex.org/W2052432706","https://openalex.org/W2053045757","https://openalex.org/W2070632872","https://openalex.org/W2072936489","https://openalex.org/W2073663206","https://openalex.org/W2074385562","https://openalex.org/W2079107830","https://openalex.org/W2103332648","https://openalex.org/W2114240051","https://openalex.org/W2115309483","https://openalex.org/W2148154295","https://openalex.org/W2148210463","https://openalex.org/W2153072229","https://openalex.org/W3044645048","https://openalex.org/W3194865684","https://openalex.org/W6633154970","https://openalex.org/W6800395879"],"related_works":["https://openalex.org/W2955734379","https://openalex.org/W2049216635","https://openalex.org/W3121709727","https://openalex.org/W1537382653","https://openalex.org/W2517279098","https://openalex.org/W2309621853","https://openalex.org/W2570392075","https://openalex.org/W1556735226","https://openalex.org/W2023906867","https://openalex.org/W2124217695"],"abstract_inverted_index":{"We":[0],"present":[1,47],"the":[2,30,38,65,70,79,83,89,105,127,132],"Lixto":[3,71,95,133,163],"project,":[4],"which":[5,61,136],"is":[6,62,147],"both":[7,75],"a":[8,15,41,76,148],"research":[9],"project":[10],"in":[11,36,69,94,151],"database":[12],"theory":[13],"and":[14,24,34,55,82,96,131,164],"commercial":[16],"enterprise":[17],"that":[18,112],"develops":[19],"Web":[20,25,145,156],"data":[21,142],"extraction":[22],"(wrapping)":[23],"service":[26],"definition":[27],"software.We":[28],"discuss":[29,102,159],"project's":[31],"main":[32],"motivations":[33],"ideas,":[35],"particular":[37],"use":[39],"of":[40,78,85,100,107,119,129,141,162],"logic-based":[42,120],"framework":[43],"for":[44,110,122,138,169],"wrapping.Then":[45,123],"we":[46,124,158],"theoretical":[48,117],"results":[49,73],"on":[50,56,104,155],"monadic":[51],"datalog":[52],"over":[53],"trees":[54,111],"Elog,":[57],"its":[58],"close":[59],"relative":[60],"used":[63],"as":[64],"internal":[66],"wrapper":[67,91],"language":[68],"system.These":[72],"include":[74],"characterization":[77],"expressive":[80],"power":[81],"complexity":[84,106],"these":[86],"languages.We":[87],"describe":[88],"visual":[90],"specification":[92],"process":[93],"various":[97],"practical":[98],"aspects":[99],"wrapping.We":[101],"work":[103],"query":[108],"languages":[109,121],"was":[113],"inseminated":[114],"by":[115],"our":[116],"study":[118],"return":[125],"to":[126,166],"practice":[128],"wrapping":[130],"Transformation":[134],"Server,":[135],"allows":[137],"streaming":[139],"integration":[140],"extracted":[143],"from":[144],"pages.This":[146],"natural":[149],"requirement":[150],"complex":[152],"services":[153],"based":[154],"wrapping.Finally,":[157],"industrial":[160],"applications":[161],"point":[165],"open":[167],"problems":[168],"future":[170],"study.":[171]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":3},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":5},{"year":2015,"cited_by_count":8},{"year":2014,"cited_by_count":7},{"year":2013,"cited_by_count":6},{"year":2012,"cited_by_count":6}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
