{"id":"https://openalex.org/W2115243937","doi":"https://doi.org/10.1109/rcis.2009.5089286","title":"AgentMat: Framework for data scraping and semantization","display_name":"AgentMat: Framework for data scraping and semantization","publication_year":2009,"publication_date":"2009-04-01","ids":{"openalex":"https://openalex.org/W2115243937","doi":"https://doi.org/10.1109/rcis.2009.5089286","mag":"2115243937"},"language":"en","primary_location":{"id":"doi:10.1109/rcis.2009.5089286","is_oa":false,"landing_page_url":"https://doi.org/10.1109/rcis.2009.5089286","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2009 Third International Conference on Research Challenges in Information Science","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048471272","display_name":"Miloslav Beno","orcid":null},"institutions":[{"id":"https://openalex.org/I21250087","display_name":"Charles University","ror":"https://ror.org/024d6js02","country_code":"CZ","type":"education","lineage":["https://openalex.org/I21250087"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Miloslav Beno","raw_affiliation_strings":["Department of Software Engineering, Charles University in Prague, Czech Republic","Department of Software Engineering, Charles University in Prague, Czech Republic#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Software Engineering, Charles University in Prague, Czech Republic","institution_ids":["https://openalex.org/I21250087"]},{"raw_affiliation_string":"Department of Software Engineering, Charles University in Prague, Czech Republic#TAB#","institution_ids":["https://openalex.org/I21250087"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034566474","display_name":"Jakub M\u00ed\u0161ek","orcid":"https://orcid.org/0000-0002-0792-2054"},"institutions":[{"id":"https://openalex.org/I21250087","display_name":"Charles University","ror":"https://ror.org/024d6js02","country_code":"CZ","type":"education","lineage":["https://openalex.org/I21250087"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Jakub Misek","raw_affiliation_strings":["Department of Software Engineering, Charles University in Prague, Czech Republic","Department of Software Engineering, Charles University in Prague, Czech Republic#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Software Engineering, Charles University in Prague, Czech Republic","institution_ids":["https://openalex.org/I21250087"]},{"raw_affiliation_string":"Department of Software Engineering, Charles University in Prague, Czech Republic#TAB#","institution_ids":["https://openalex.org/I21250087"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031305661","display_name":"Filip Zavoral","orcid":"https://orcid.org/0000-0003-3140-8538"},"institutions":[{"id":"https://openalex.org/I21250087","display_name":"Charles University","ror":"https://ror.org/024d6js02","country_code":"CZ","type":"education","lineage":["https://openalex.org/I21250087"]}],"countries":["CZ"],"is_corresponding":false,"raw_author_name":"Filip Zavoral","raw_affiliation_strings":["Department of Software Engineering, Charles University in Prague, Czech Republic","Department of Software Engineering, Charles University in Prague, Czech Republic#TAB#"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Software Engineering, Charles University in Prague, Czech Republic","institution_ids":["https://openalex.org/I21250087"]},{"raw_affiliation_string":"Department of Software Engineering, Charles University in Prague, Czech Republic#TAB#","institution_ids":["https://openalex.org/I21250087"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.3589,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.85172948,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"1","issue":null,"first_page":"225","last_page":"236"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10679","display_name":"Service-Oriented Architecture and Web Services","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8761630058288574},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.6802120208740234},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.6329209208488464},{"id":"https://openalex.org/keywords/static-web-page","display_name":"Static web page","score":0.6034006476402283},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5873873233795166},{"id":"https://openalex.org/keywords/data-web","display_name":"Data Web","score":0.5852224826812744},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.5455195903778076},{"id":"https://openalex.org/keywords/xml","display_name":"XML","score":0.5402886271476746},{"id":"https://openalex.org/keywords/semantic-web-stack","display_name":"Semantic Web Stack","score":0.5260239243507385},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5184297561645508},{"id":"https://openalex.org/keywords/web-modeling","display_name":"Web modeling","score":0.49647217988967896},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.46531084179878235},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.4546930193901062},{"id":"https://openalex.org/keywords/semantic-web","display_name":"Semantic Web","score":0.44501200318336487},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.42683690786361694},{"id":"https://openalex.org/keywords/web-development","display_name":"Web development","score":0.36895573139190674}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8761630058288574},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.6802120208740234},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.6329209208488464},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.6034006476402283},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5873873233795166},{"id":"https://openalex.org/C162005631","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Data Web","level":3,"score":0.5852224826812744},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.5455195903778076},{"id":"https://openalex.org/C8797682","wikidata":"https://www.wikidata.org/wiki/Q2115","display_name":"XML","level":2,"score":0.5402886271476746},{"id":"https://openalex.org/C167379230","wikidata":"https://www.wikidata.org/wiki/Q1026884","display_name":"Semantic Web Stack","level":3,"score":0.5260239243507385},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5184297561645508},{"id":"https://openalex.org/C130436687","wikidata":"https://www.wikidata.org/wiki/Q7978591","display_name":"Web modeling","level":3,"score":0.49647217988967896},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.46531084179878235},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.4546930193901062},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.44501200318336487},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.42683690786361694},{"id":"https://openalex.org/C79373723","wikidata":"https://www.wikidata.org/wiki/Q386275","display_name":"Web development","level":3,"score":0.36895573139190674},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/rcis.2009.5089286","is_oa":false,"landing_page_url":"https://doi.org/10.1109/rcis.2009.5089286","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2009 Third International Conference on Research Challenges in Information Science","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5400000214576721}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W1515993250","https://openalex.org/W1601674470","https://openalex.org/W1983791213","https://openalex.org/W2119710029","https://openalex.org/W2135786458","https://openalex.org/W2182482834","https://openalex.org/W4239696231","https://openalex.org/W6631032457"],"related_works":["https://openalex.org/W2100071482","https://openalex.org/W1495666889","https://openalex.org/W1583761149","https://openalex.org/W980488187","https://openalex.org/W4249203355","https://openalex.org/W2184421161","https://openalex.org/W1782470797","https://openalex.org/W2955269344","https://openalex.org/W2161987785","https://openalex.org/W2021161024"],"abstract_inverted_index":{"Most":[0],"of":[1,5,70,73,100],"the":[2,8,32,39,43,76,88,110,123,127,142,152,157],"enormous":[3],"amount":[4,72],"information":[6,158],"from":[7,42,75,126,159],"internet":[9],"is":[10,65,81],"available":[11],"just":[12],"like":[13],"Web":[14,77,116,132,161],"pages":[15,133],"made":[16],"for":[17,27,67,168],"a":[18,59,93,114],"human":[19],"reader.":[20],"They":[21],"don't":[22],"have":[23,55,150],"any":[24],"common":[25],"interface":[26],"accessing,":[28],"searching":[29],"or":[30],"browsing":[31],"data.":[33],"Hence,":[34],"it's":[35],"hard":[36],"to":[37,108,119],"extract":[38],"semantic":[40,143],"data":[41,74],"Web,":[44],"categorize":[45],"them":[46,49],"and":[47,57,130,138,166],"keep":[48],"updated.":[50],"For":[51],"this":[52,120],"purpose":[53],"we":[54,149],"designed":[56,66],"implemented":[58],"system":[60,64,101,122],"called":[61],"AgentMat.":[62],"This":[63],"efficient":[68],"extraction":[69,90],"large":[71],"pages.":[78],"AgentMat":[79],"processing":[80],"based":[82],"on":[83,113],"an":[84],"XML-based":[85],"language":[86],"describing":[87],"given":[89],"task":[91,97],"in":[92],"declarative":[94],"way.":[95],"The":[96],"description":[98],"consists":[99],"components,":[102],"which":[103,155],"connected":[104],"together":[105,140],"are":[106],"able":[107],"perform":[109],"desired":[111],"functionality":[112],"general":[115],"page.":[117],"Thanks":[118],"scraping":[121],"raw":[124],"contents":[125],"irregularly":[128],"updated":[129],"unstructured":[131],"can":[134],"be":[135],"kept":[136],"categorized":[137],"accessed":[139],"with":[141],"metadata.":[144],"In":[145],"our":[146],"pilot":[147],"implementation":[148],"built":[151],"MediaPub":[153],"system,":[154],"extracts":[156],"various":[160],"pages,":[162],"does":[163],"automatic":[164],"categorizing":[165],"checks":[167],"duplicities.":[169]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
