{"id":"https://openalex.org/W2133459307","doi":"https://doi.org/10.1109/wi.2007.107","title":"Detection of Web Subsites: Concepts, Algorithms, and Evaluation Issues","display_name":"Detection of Web Subsites: Concepts, Algorithms, and Evaluation Issues","publication_year":2007,"publication_date":"2007-11-01","ids":{"openalex":"https://openalex.org/W2133459307","doi":"https://doi.org/10.1109/wi.2007.107","mag":"2133459307"},"language":"en","primary_location":{"id":"doi:10.1109/wi.2007.107","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wi.2007.107","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/WIC/ACM International Conference on Web Intelligence (WI'07)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049590147","display_name":"Eduarda Mendes Rodrigues","orcid":"https://orcid.org/0009-0003-4086-3935"},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Eduarda Mendes Rodrigues","raw_affiliation_strings":["Microsoft Research Limited, Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Limited, Cambridge, UK","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113653594","display_name":"Nata\u0161a Mili\u0107-Frayling","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Natasa Milic-Frayling","raw_affiliation_strings":["Microsoft Research Limited, Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"Microsoft Research Limited, Cambridge, UK","institution_ids":["https://openalex.org/I4210164937"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019788602","display_name":"Bla\u017e Fortuna","orcid":"https://orcid.org/0000-0002-8585-9388"},"institutions":[{"id":"https://openalex.org/I3006985408","display_name":"Jo\u017eef Stefan Institute","ror":"https://ror.org/05060sz93","country_code":"SI","type":"facility","lineage":["https://openalex.org/I3006985408"]}],"countries":["SI"],"is_corresponding":false,"raw_author_name":"Blaz Fortuna","raw_affiliation_strings":["Department of Knowledge Technologies, Jo\u017eef Stefan Institute, Ljubljana, Slovenia"],"affiliations":[{"raw_affiliation_string":"Department of Knowledge Technologies, Jo\u017eef Stefan Institute, Ljubljana, Slovenia","institution_ids":["https://openalex.org/I3006985408"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5049590147"],"corresponding_institution_ids":["https://openalex.org/I4210164937"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.1873515,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"286","issue":null,"first_page":"66","last_page":"73"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13976","display_name":"Web visibility and informetrics","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hyperlink","display_name":"Hyperlink","score":0.8759872913360596},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7831180095672607},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6354796290397644},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5467296242713928},{"id":"https://openalex.org/keywords/web-site","display_name":"Web site","score":0.533928394317627},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.5111456513404846},{"id":"https://openalex.org/keywords/site-map","display_name":"Site map","score":0.47593358159065247},{"id":"https://openalex.org/keywords/link-analysis","display_name":"Link analysis","score":0.45343413949012756},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4504953622817993},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.4123838543891907},{"id":"https://openalex.org/keywords/web-navigation","display_name":"Web navigation","score":0.39657172560691833},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.38278090953826904},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3810409605503082},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.37032458186149597},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.3455798327922821},{"id":"https://openalex.org/keywords/static-web-page","display_name":"Static web page","score":0.2547371983528137},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.17233163118362427}],"concepts":[{"id":"https://openalex.org/C30088001","wikidata":"https://www.wikidata.org/wiki/Q102014","display_name":"Hyperlink","level":3,"score":0.8759872913360596},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7831180095672607},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6354796290397644},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5467296242713928},{"id":"https://openalex.org/C2984519610","wikidata":"https://www.wikidata.org/wiki/Q35127","display_name":"Web site","level":3,"score":0.533928394317627},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.5111456513404846},{"id":"https://openalex.org/C67617509","wikidata":"https://www.wikidata.org/wiki/Q1503327","display_name":"Site map","level":5,"score":0.47593358159065247},{"id":"https://openalex.org/C1173588","wikidata":"https://www.wikidata.org/wiki/Q6554294","display_name":"Link analysis","level":2,"score":0.45343413949012756},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4504953622817993},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.4123838543891907},{"id":"https://openalex.org/C61096286","wikidata":"https://www.wikidata.org/wiki/Q7978592","display_name":"Web navigation","level":3,"score":0.39657172560691833},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.38278090953826904},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3810409605503082},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.37032458186149597},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.3455798327922821},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.2547371983528137},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.17233163118362427}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/wi.2007.107","is_oa":false,"landing_page_url":"https://doi.org/10.1109/wi.2007.107","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/WIC/ACM International Conference on Web Intelligence (WI'07)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W65299677","https://openalex.org/W199391036","https://openalex.org/W1505539175","https://openalex.org/W1602667807","https://openalex.org/W1854214752","https://openalex.org/W1971318281","https://openalex.org/W1989338554","https://openalex.org/W2008620264","https://openalex.org/W2013029404","https://openalex.org/W2034607275","https://openalex.org/W2039346806","https://openalex.org/W2040058125","https://openalex.org/W2040075907","https://openalex.org/W2041565863","https://openalex.org/W2049781914","https://openalex.org/W2050028965","https://openalex.org/W2057320839","https://openalex.org/W2070412788","https://openalex.org/W2073722401","https://openalex.org/W2074449313","https://openalex.org/W2089800021","https://openalex.org/W2092906664","https://openalex.org/W2100958137","https://openalex.org/W2101005742","https://openalex.org/W2109760176","https://openalex.org/W2115579680","https://openalex.org/W2117209866","https://openalex.org/W2118382442","https://openalex.org/W2137820941","https://openalex.org/W2138621811","https://openalex.org/W2139653758","https://openalex.org/W2140963174","https://openalex.org/W2148175401","https://openalex.org/W2152133813","https://openalex.org/W2158129944","https://openalex.org/W2158899306","https://openalex.org/W2160799467","https://openalex.org/W2168924255","https://openalex.org/W2175110005","https://openalex.org/W3011290429","https://openalex.org/W6630389553","https://openalex.org/W6639055396","https://openalex.org/W6645485993","https://openalex.org/W6685000865"],"related_works":["https://openalex.org/W2116757369","https://openalex.org/W2916824151","https://openalex.org/W2583864867","https://openalex.org/W4387261808","https://openalex.org/W4254187858","https://openalex.org/W1965490711","https://openalex.org/W2392479856","https://openalex.org/W1488890239","https://openalex.org/W1558012608","https://openalex.org/W4246489105"],"abstract_inverted_index":{"Web":[0,52,83,100,116,129],"sites":[1,117],"are":[2,141],"often":[3],"organized":[4],"into":[5],"several":[6],"regions,":[7],"each":[8],"dedicated":[9],"to":[10,41,50,124],"a":[11,16,20,33,56,62,78,94,99,113,152],"specific":[12],"topic":[13],"or":[14],"serving":[15],"particular":[17],"function.":[18],"From":[19],"user's":[21],"perspective,":[22],"these":[23],"regions":[24],"typically":[25],"form":[26],"coherent":[27],"sets":[28],"of":[29,58,98,115,145,158],"pages":[30],"characterized":[31],"by":[32],"distinct":[34],"navigation":[35],"structure":[36,54,85,97],"and":[37,60,67,102,118,162],"page":[38,109],"layout-we":[39],"refer":[40],"them":[42],"as":[43,55],"subsites.":[44],"In":[45,73],"this":[46],"paper":[47],"we":[48,76,150],"propose":[49],"characterize":[51],"site":[53,84,101,138,147],"collection":[57],"subsites":[59,66],"devise":[61,151],"method":[63,153],"for":[64,70,81,128,154],"detecting":[65],"entry":[68],"points":[69],"subsite":[71,159],"navigation.":[72],"our":[74],"approach":[75,123,136],"use":[77],"new":[79],"model":[80],"representing":[82],"called":[86],"Link":[87],"Structure":[88],"Graph":[89],"(LSG).":[90],"The":[91],"LSG":[92,121,135],"captures":[93],"complete":[95],"hyperlink":[96],"models":[103],"link":[104],"associations":[105],"reflected":[106],"in":[107],"the":[108,120,143,156],"layout.":[110],"We":[111,132],"analyze":[112],"sample":[114],"compare":[119],"based":[122],"commonly":[125],"used":[126],"statistics":[127],"graph":[130],"analysis.":[131],"demonstrate":[133],"that":[134,140],"reveals":[137],"properties":[139],"beyond":[142],"reach":[144],"standard":[146],"models.":[148],"Furthermore,":[149],"evaluating":[155],"performance":[157],"detection":[160],"algorithms":[161],"provide":[163],"evaluation":[164],"guidelines.":[165]},"counts_by_year":[{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
