{"id":"https://openalex.org/W1966598450","doi":"https://doi.org/10.1108/17440080580000088","title":"Discovery of concept entities from web sites using web unit mining","display_name":"Discovery of concept entities from web sites using web unit mining","publication_year":2005,"publication_date":"2005-08-01","ids":{"openalex":"https://openalex.org/W1966598450","doi":"https://doi.org/10.1108/17440080580000088","mag":"1966598450"},"language":"en","primary_location":{"id":"doi:10.1108/17440080580000088","is_oa":false,"landing_page_url":"https://doi.org/10.1108/17440080580000088","pdf_url":null,"source":{"id":"https://openalex.org/S145159096","display_name":"International Journal of Web Information Systems","issn_l":"1744-0084","issn":["1744-0084","1744-0092"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319811","host_organization_name":"Emerald Publishing Limited","host_organization_lineage":["https://openalex.org/P4310319811"],"host_organization_lineage_names":["Emerald Publishing Limited"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Web Information Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://dr.ntu.edu.sg/bitstream/10356/93769/1/2005-wum-jwis.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5097453035","display_name":"Ming Yin Ming","orcid":null},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Ming Yin Ming","raw_affiliation_strings":["Division of Information Studies, School of Communication & Information,Nanyang Technological University, Singapore 639798"],"affiliations":[{"raw_affiliation_string":"Division of Information Studies, School of Communication & Information,Nanyang Technological University, Singapore 639798","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051793131","display_name":"Dion Hoe\u2010Lian Goh","orcid":"https://orcid.org/0000-0003-2904-3269"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Dion Hoe\u2010lian Goh","raw_affiliation_strings":["Division of Information Studies, School of Communication & Information,Nanyang Technological University, Singapore 639798"],"affiliations":[{"raw_affiliation_string":"Division of Information Studies, School of Communication & Information,Nanyang Technological University, Singapore 639798","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039617569","display_name":"Ee\u2010Peng Lim","orcid":"https://orcid.org/0000-0003-0065-8665"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Ee\u2010Peng Lim","raw_affiliation_strings":["Centre for Advanced Information Systems, School of Computer Engineering,Nanyang Technological University, Singapore 639798"],"affiliations":[{"raw_affiliation_string":"Centre for Advanced Information Systems, School of Computer Engineering,Nanyang Technological University, Singapore 639798","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100618738","display_name":"Aixin Sun","orcid":"https://orcid.org/0000-0003-0764-4258"},"institutions":[{"id":"https://openalex.org/I31746571","display_name":"UNSW Sydney","ror":"https://ror.org/03r8z3t63","country_code":"AU","type":"education","lineage":["https://openalex.org/I31746571"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Aixin Sun","raw_affiliation_strings":["School of Computer Science and Engineering, University of New South Wales,Sydney, NSW, Australia 2052"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, University of New South Wales,Sydney, NSW, Australia 2052","institution_ids":["https://openalex.org/I31746571"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5097453035"],"corresponding_institution_ids":["https://openalex.org/I172675005"],"apc_list":null,"apc_paid":null,"fwci":1.7028,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.89123097,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"1","issue":"3","first_page":"123","last_page":"136"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9495000243186951,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8511666059494019},{"id":"https://openalex.org/keywords/web-mining","display_name":"Web mining","score":0.7699368000030518},{"id":"https://openalex.org/keywords/data-web","display_name":"Data Web","score":0.7113143801689148},{"id":"https://openalex.org/keywords/web-modeling","display_name":"Web modeling","score":0.6703609228134155},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.6464414596557617},{"id":"https://openalex.org/keywords/hyperlink","display_name":"Hyperlink","score":0.610548734664917},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5990254878997803},{"id":"https://openalex.org/keywords/static-web-page","display_name":"Static web page","score":0.5155234336853027},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4886401295661926},{"id":"https://openalex.org/keywords/web-standards","display_name":"Web standards","score":0.4507543742656708},{"id":"https://openalex.org/keywords/web-mapping","display_name":"Web mapping","score":0.4428957998752594},{"id":"https://openalex.org/keywords/web-intelligence","display_name":"Web intelligence","score":0.4356881380081177},{"id":"https://openalex.org/keywords/web-development","display_name":"Web development","score":0.4342014491558075},{"id":"https://openalex.org/keywords/web-navigation","display_name":"Web navigation","score":0.4297703206539154},{"id":"https://openalex.org/keywords/social-semantic-web","display_name":"Social Semantic Web","score":0.42159533500671387},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3540133833885193},{"id":"https://openalex.org/keywords/web-service","display_name":"Web service","score":0.34729039669036865}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8511666059494019},{"id":"https://openalex.org/C197046077","wikidata":"https://www.wikidata.org/wiki/Q785337","display_name":"Web mining","level":3,"score":0.7699368000030518},{"id":"https://openalex.org/C162005631","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Data Web","level":3,"score":0.7113143801689148},{"id":"https://openalex.org/C130436687","wikidata":"https://www.wikidata.org/wiki/Q7978591","display_name":"Web modeling","level":3,"score":0.6703609228134155},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.6464414596557617},{"id":"https://openalex.org/C30088001","wikidata":"https://www.wikidata.org/wiki/Q102014","display_name":"Hyperlink","level":3,"score":0.610548734664917},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5990254878997803},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.5155234336853027},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4886401295661926},{"id":"https://openalex.org/C182321512","wikidata":"https://www.wikidata.org/wiki/Q1153289","display_name":"Web standards","level":3,"score":0.4507543742656708},{"id":"https://openalex.org/C24733836","wikidata":"https://www.wikidata.org/wiki/Q649186","display_name":"Web mapping","level":4,"score":0.4428957998752594},{"id":"https://openalex.org/C544335954","wikidata":"https://www.wikidata.org/wiki/Q2553348","display_name":"Web intelligence","level":4,"score":0.4356881380081177},{"id":"https://openalex.org/C79373723","wikidata":"https://www.wikidata.org/wiki/Q386275","display_name":"Web development","level":3,"score":0.4342014491558075},{"id":"https://openalex.org/C61096286","wikidata":"https://www.wikidata.org/wiki/Q7978592","display_name":"Web navigation","level":3,"score":0.4297703206539154},{"id":"https://openalex.org/C534406577","wikidata":"https://www.wikidata.org/wiki/Q7550843","display_name":"Social Semantic Web","level":3,"score":0.42159533500671387},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3540133833885193},{"id":"https://openalex.org/C35578498","wikidata":"https://www.wikidata.org/wiki/Q193424","display_name":"Web service","level":2,"score":0.34729039669036865}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1108/17440080580000088","is_oa":false,"landing_page_url":"https://doi.org/10.1108/17440080580000088","pdf_url":null,"source":{"id":"https://openalex.org/S145159096","display_name":"International Journal of Web Information Systems","issn_l":"1744-0084","issn":["1744-0084","1744-0092"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319811","host_organization_name":"Emerald Publishing Limited","host_organization_lineage":["https://openalex.org/P4310319811"],"host_organization_lineage_names":["Emerald Publishing Limited"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Web Information Systems","raw_type":"journal-article"},{"id":"pmh:oai:dr.ntu.edu.sg:10356/93769","is_oa":true,"landing_page_url":"http://hdl.handle.net/10220/6193","pdf_url":"https://dr.ntu.edu.sg/bitstream/10356/93769/1/2005-wum-jwis.pdf","source":{"id":"https://openalex.org/S4306402609","display_name":"DR-NTU (Nanyang Technological University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I172675005","host_organization_name":"Nanyang Technological University","host_organization_lineage":["https://openalex.org/I172675005"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"},{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-1092","is_oa":true,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/93","pdf_url":null,"source":{"id":"https://openalex.org/S4377196871","display_name":"Institutional Knowledge (InK) - Institutional Knowledge at Singapore Management University (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://doi.org/10.1108/17440080580000088","raw_type":"Journal Article"}],"best_oa_location":{"id":"pmh:oai:dr.ntu.edu.sg:10356/93769","is_oa":true,"landing_page_url":"http://hdl.handle.net/10220/6193","pdf_url":"https://dr.ntu.edu.sg/bitstream/10356/93769/1/2005-wum-jwis.pdf","source":{"id":"https://openalex.org/S4306402609","display_name":"DR-NTU (Nanyang Technological University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I172675005","host_organization_name":"Nanyang Technological University","host_organization_lineage":["https://openalex.org/I172675005"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1966598450.pdf","grobid_xml":"https://content.openalex.org/works/W1966598450.grobid-xml"},"referenced_works_count":27,"referenced_works":["https://openalex.org/W1503141391","https://openalex.org/W1574862351","https://openalex.org/W1593624493","https://openalex.org/W1823119923","https://openalex.org/W1989468977","https://openalex.org/W2000900876","https://openalex.org/W2005422315","https://openalex.org/W2013123195","https://openalex.org/W2052142057","https://openalex.org/W2065168033","https://openalex.org/W2067223933","https://openalex.org/W2073576572","https://openalex.org/W2076008912","https://openalex.org/W2082398795","https://openalex.org/W2085470508","https://openalex.org/W2110224739","https://openalex.org/W2153662613","https://openalex.org/W2155800811","https://openalex.org/W2164052363","https://openalex.org/W2166227910","https://openalex.org/W2914756598","https://openalex.org/W4214882679","https://openalex.org/W4237417907","https://openalex.org/W4238941560","https://openalex.org/W4241216069","https://openalex.org/W4244891075","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2182629206","https://openalex.org/W2330648582","https://openalex.org/W2134078223","https://openalex.org/W2415191659","https://openalex.org/W144381034","https://openalex.org/W4385950365","https://openalex.org/W2555306506","https://openalex.org/W2348335475","https://openalex.org/W2945931694","https://openalex.org/W2520851676"],"abstract_inverted_index":{"A":[0],"web":[1,17,32,39,52,71,84,87,109,130,136],"site":[2,33],"usually":[3],"contains":[4],"a":[5,56,106],"large":[6],"number":[7],"of":[8,13,68],"concept":[9,27,57,61,91],"entities,":[10],"each":[11],"consisting":[12],"one":[14,83],"or":[15],"more":[16,30,81,107],"pages":[18,53],"connected":[19],"by":[20],"hyperlinks.":[21],"In":[22],"order":[23],"to":[24,50,98,125],"discover":[25,126],"these":[26],"entities":[28,62],"for":[29],"expressive":[31],"queries":[34],"and":[35,59,127,141],"other":[36],"applications,":[37],"the":[38,66,149],"unit":[40,47,72,85],"mining":[41,48,73],"problem":[42],"has":[43],"been":[44,144],"proposed.":[45],"Web":[46],"aims":[49],"determine":[51],"that":[54,134],"constitute":[55],"entity":[58],"classify":[60],"into":[63],"categories.":[64],"Nevertheless,":[65],"performance":[67],"&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\nan":[69],"existing":[70],"algorithm,":[74],"iWUM,":[75],"suffers":[76],"as":[77,114],"it":[78],"may":[79],"&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\ncreate":[80],"than":[82],"(incomplete":[86],"units)":[88],"from":[89],"a&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\nsingle":[90],"entity.":[92],"This":[93],"paper":[94],"presents":[95],"two":[96],"methods":[97],"solve":[99],"this":[100],"problem.":[101],"The":[102,119],"first":[103],"method":[104,112,121],"introduces":[105],"effective":[108],"&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\nfragment":[110],"construction":[111],"so":[113],"reduce":[115],"later":[116],"classification&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n":[117],"errors.":[118],"second":[120],"incorporates":[122],"site-specific":[123],"knowledge":[124],"handle":[128],"incomplete":[129,135],"units.":[131],"Experiments":[132],"show":[133],"units":[137],"can":[138],"be":[139],"removed":[140],"overall":[142],"accuracy&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\nhas":[143],"significantly":[145],"improved,":[146],"especially":[147],"on":[148],"precision":[150],"and&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n&#13;\\n":[151],"F1":[152],"measures.":[153]},"counts_by_year":[{"year":2015,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2016-06-24T00:00:00"}
