{"id":"https://openalex.org/W2163949702","doi":"https://doi.org/10.1109/cscwd.2010.5471969","title":"Web information extraction based on hidden Markov model","display_name":"Web information extraction based on hidden Markov model","publication_year":2010,"publication_date":"2010-04-01","ids":{"openalex":"https://openalex.org/W2163949702","doi":"https://doi.org/10.1109/cscwd.2010.5471969","mag":"2163949702"},"language":"en","primary_location":{"id":"doi:10.1109/cscwd.2010.5471969","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cscwd.2010.5471969","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 2010 14th International Conference on Computer Supported Cooperative Work in Design","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086230381","display_name":"Jianbing Lai","orcid":null},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jianbing Lai","raw_affiliation_strings":["School of Software, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100409494","display_name":"Qiang Liu","orcid":"https://orcid.org/0000-0002-8402-029X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiang Liu","raw_affiliation_strings":["School of Software, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100330618","display_name":"Yi Liu","orcid":"https://orcid.org/0000-0003-1399-7420"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yi Liu","raw_affiliation_strings":["School of Software, Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Software, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5086230381"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":4.1622,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.94952451,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"234","last_page":"238"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13734","display_name":"Advanced Computational Techniques and Applications","score":0.9822999835014343,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.958299994468689,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.769329309463501},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.6907286047935486},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5692870616912842},{"id":"https://openalex.org/keywords/maximum-entropy-markov-model","display_name":"Maximum-entropy Markov model","score":0.5254380106925964},{"id":"https://openalex.org/keywords/markov-model","display_name":"Markov model","score":0.47052064538002014},{"id":"https://openalex.org/keywords/web-application","display_name":"Web application","score":0.4567585587501526},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.42381662130355835},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36401641368865967},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.3373926281929016},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3241509795188904},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3173717260360718},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.23791125416755676},{"id":"https://openalex.org/keywords/variable-order-markov-model","display_name":"Variable-order Markov model","score":0.21268120408058167}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.769329309463501},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.6907286047935486},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5692870616912842},{"id":"https://openalex.org/C196956702","wikidata":"https://www.wikidata.org/wiki/Q6795829","display_name":"Maximum-entropy Markov model","level":5,"score":0.5254380106925964},{"id":"https://openalex.org/C163836022","wikidata":"https://www.wikidata.org/wiki/Q6771326","display_name":"Markov model","level":3,"score":0.47052064538002014},{"id":"https://openalex.org/C118643609","wikidata":"https://www.wikidata.org/wiki/Q189210","display_name":"Web application","level":2,"score":0.4567585587501526},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.42381662130355835},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36401641368865967},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.3373926281929016},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3241509795188904},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3173717260360718},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.23791125416755676},{"id":"https://openalex.org/C54907487","wikidata":"https://www.wikidata.org/wiki/Q7915688","display_name":"Variable-order Markov model","level":4,"score":0.21268120408058167},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cscwd.2010.5471969","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cscwd.2010.5471969","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 2010 14th International Conference on Computer Supported Cooperative Work in Design","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.5099999904632568,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W1568339100","https://openalex.org/W1934019294","https://openalex.org/W1999595522","https://openalex.org/W2105594594","https://openalex.org/W2125838338","https://openalex.org/W2137661905","https://openalex.org/W2142384583","https://openalex.org/W2145948275","https://openalex.org/W6634115791","https://openalex.org/W6640267182","https://openalex.org/W6681762687"],"related_works":["https://openalex.org/W2134386692","https://openalex.org/W2379938888","https://openalex.org/W1510894296","https://openalex.org/W2116722627","https://openalex.org/W1977445474","https://openalex.org/W4233405330","https://openalex.org/W2032015827","https://openalex.org/W2194396582","https://openalex.org/W2082284720","https://openalex.org/W2566202039"],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"a":[3],"semantic-block-based":[4],"hidden":[5],"Markov":[6],"model.":[7],"Semantic":[8],"block":[9,29],"is":[10],"segmented":[11],"from":[12],"the":[13,31,39,48,53,59,64,73,81,83,91],"elicited":[14],"information":[15,105],"of":[16,23,52,67],"various":[17],"websites":[18],"based":[19],"on":[20],"their":[21],"characteristic":[22],"semi-structure.":[24],"The":[25],"model":[26,93],"adopts":[27],"semantic":[28],"as":[30],"basic":[32],"element":[33,41],"in":[34,44,99],"an":[35],"observation":[36,60],"sequence,":[37],"replacing":[38],"original":[40],"\u2014":[42],"word,":[43],"order":[45],"to":[46,88],"improve":[47],"accuracy":[49,66],"and":[50,63,76,94,101],"efficiency":[51],"transition":[54,69],"matrix.":[55],"Also,":[56],"it":[57],"optimizes":[58],"probability":[61],"distribution":[62],"estimation":[65],"state":[68],"sequence":[70],"by":[71],"adopting":[72],"\u201cvoting":[74],"strategy\u201d":[75],"modifying":[77],"Viterbi":[78],"algorithm.":[79],"In":[80],"end,":[82],"experiment":[84],"results":[85],"are":[86],"able":[87],"show":[89],"that":[90],"new":[92],"algorithms":[95],"give":[96],"satisfying":[97],"performance":[98],"recall":[100],"precision":[102],"for":[103],"web":[104],"extraction.":[106]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
