{"id":"https://openalex.org/W2612236765","doi":"https://doi.org/10.17706/jsw.12.3.180-188","title":"The Automatic Extraction of Web Information Based on Regular Expression","display_name":"The Automatic Extraction of Web Information Based on Regular Expression","publication_year":2017,"publication_date":"2017-04-01","ids":{"openalex":"https://openalex.org/W2612236765","doi":"https://doi.org/10.17706/jsw.12.3.180-188","mag":"2612236765"},"language":"en","primary_location":{"id":"doi:10.17706/jsw.12.3.180-188","is_oa":true,"landing_page_url":"https://doi.org/10.17706/jsw.12.3.180-188","pdf_url":"http://www.jsoftware.us/vol12/241-JSW15206.pdf","source":{"id":"https://openalex.org/S114141714","display_name":"Journal of Software","issn_l":"1796-217X","issn":["1796-217X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318660","host_organization_name":"Academy Publisher","host_organization_lineage":["https://openalex.org/P4310318660"],"host_organization_lineage_names":["Academy Publisher"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Software","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"http://www.jsoftware.us/vol12/241-JSW15206.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100677317","display_name":"Li Ji","orcid":"https://orcid.org/0000-0003-4397-5273"},"institutions":[{"id":"https://openalex.org/I1284762954","display_name":"Zhejiang A & F University","ror":"https://ror.org/02vj4rn06","country_code":"CN","type":"education","lineage":["https://openalex.org/I1284762954"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Li Ji","raw_affiliation_strings":["Zhejiang Provincial Key Laboratory of Forestry Intelligent Monitoring and Information Technology, Zhejiang A&F University, Lin'an 311300,China","School of Information Engineering, Zhejiang A&F University, Lin'an 311300,China"],"affiliations":[{"raw_affiliation_string":"Zhejiang Provincial Key Laboratory of Forestry Intelligent Monitoring and Information Technology, Zhejiang A&F University, Lin'an 311300,China","institution_ids":["https://openalex.org/I1284762954"]},{"raw_affiliation_string":"School of Information Engineering, Zhejiang A&F University, Lin'an 311300,China","institution_ids":["https://openalex.org/I1284762954"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034251097","display_name":"Guangyu Jiang","orcid":"https://orcid.org/0000-0003-1141-1019"},"institutions":[{"id":"https://openalex.org/I1284762954","display_name":"Zhejiang A & F University","ror":"https://ror.org/02vj4rn06","country_code":"CN","type":"education","lineage":["https://openalex.org/I1284762954"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiang Guangyu","raw_affiliation_strings":["School of Information Engineering, Zhejiang A&F University, Lin'an 311300,China","Zhejiang Provincial Key Laboratory of Forestry Intelligent Monitoring and Information Technology, Zhejiang A&F University, Lin'an 311300,China"],"affiliations":[{"raw_affiliation_string":"School of Information Engineering, Zhejiang A&F University, Lin'an 311300,China","institution_ids":["https://openalex.org/I1284762954"]},{"raw_affiliation_string":"Zhejiang Provincial Key Laboratory of Forestry Intelligent Monitoring and Information Technology, Zhejiang A&F University, Lin'an 311300,China","institution_ids":["https://openalex.org/I1284762954"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019056989","display_name":"Aijun Xu","orcid":"https://orcid.org/0000-0001-6789-6938"},"institutions":[{"id":"https://openalex.org/I1284762954","display_name":"Zhejiang A & F University","ror":"https://ror.org/02vj4rn06","country_code":"CN","type":"education","lineage":["https://openalex.org/I1284762954"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xu Aijun","raw_affiliation_strings":["School of Information Engineering, Zhejiang A&F University, Lin'an 311300,China","Zhejiang Provincial Key Laboratory of Forestry Intelligent Monitoring and Information Technology, Zhejiang A&F University, Lin'an 311300,China"],"affiliations":[{"raw_affiliation_string":"School of Information Engineering, Zhejiang A&F University, Lin'an 311300,China","institution_ids":["https://openalex.org/I1284762954"]},{"raw_affiliation_string":"Zhejiang Provincial Key Laboratory of Forestry Intelligent Monitoring and Information Technology, Zhejiang A&F University, Lin'an 311300,China","institution_ids":["https://openalex.org/I1284762954"]}]},{"author_position":"last","author":{"id":null,"display_name":"Wang Yunzhen","orcid":null},"institutions":[{"id":"https://openalex.org/I898388014","display_name":"Woodside (Australia)","ror":"https://ror.org/03vy1bn66","country_code":"AU","type":"company","lineage":["https://openalex.org/I898388014"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Wang Yunzhen","raw_affiliation_strings":["Jiande Xin'anjiang Woodland , Jiande 311600"],"affiliations":[{"raw_affiliation_string":"Jiande Xin'anjiang Woodland , Jiande 311600","institution_ids":["https://openalex.org/I898388014"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100677317"],"corresponding_institution_ids":["https://openalex.org/I1284762954"],"apc_list":null,"apc_paid":null,"fwci":2.0234,"has_fulltext":true,"cited_by_count":6,"citation_normalized_percentile":{"value":0.89594182,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"12","issue":"4","first_page":"180","last_page":"188"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9354513883590698},{"id":"https://openalex.org/keywords/regular-expression","display_name":"Regular expression","score":0.603364109992981},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.5486019253730774},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5008368492126465},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4618012607097626},{"id":"https://openalex.org/keywords/extraction","display_name":"Extraction (chemistry)","score":0.45203036069869995},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.4081032872200012},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33449453115463257},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.12678369879722595},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.03763309121131897}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9354513883590698},{"id":"https://openalex.org/C121329065","wikidata":"https://www.wikidata.org/wiki/Q185612","display_name":"Regular expression","level":2,"score":0.603364109992981},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.5486019253730774},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5008368492126465},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4618012607097626},{"id":"https://openalex.org/C4725764","wikidata":"https://www.wikidata.org/wiki/Q844704","display_name":"Extraction (chemistry)","level":2,"score":0.45203036069869995},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4081032872200012},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33449453115463257},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12678369879722595},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.03763309121131897},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.17706/jsw.12.3.180-188","is_oa":true,"landing_page_url":"https://doi.org/10.17706/jsw.12.3.180-188","pdf_url":"http://www.jsoftware.us/vol12/241-JSW15206.pdf","source":{"id":"https://openalex.org/S114141714","display_name":"Journal of Software","issn_l":"1796-217X","issn":["1796-217X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318660","host_organization_name":"Academy Publisher","host_organization_lineage":["https://openalex.org/P4310318660"],"host_organization_lineage_names":["Academy Publisher"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Software","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.17706/jsw.12.3.180-188","is_oa":true,"landing_page_url":"https://doi.org/10.17706/jsw.12.3.180-188","pdf_url":"http://www.jsoftware.us/vol12/241-JSW15206.pdf","source":{"id":"https://openalex.org/S114141714","display_name":"Journal of Software","issn_l":"1796-217X","issn":["1796-217X"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318660","host_organization_name":"Academy Publisher","host_organization_lineage":["https://openalex.org/P4310318660"],"host_organization_lineage_names":["Academy Publisher"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Software","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2612236765.pdf","grobid_xml":"https://content.openalex.org/works/W2612236765.grobid-xml"},"referenced_works_count":11,"referenced_works":["https://openalex.org/W2113730959","https://openalex.org/W2314768640","https://openalex.org/W2334116122","https://openalex.org/W2349213514","https://openalex.org/W2359514473","https://openalex.org/W2360179890","https://openalex.org/W2362238332","https://openalex.org/W2367919820","https://openalex.org/W2368308506","https://openalex.org/W2375849735","https://openalex.org/W2378560371"],"related_works":["https://openalex.org/W2563334590","https://openalex.org/W3094387502","https://openalex.org/W2922478741","https://openalex.org/W2101092231","https://openalex.org/W2119827098","https://openalex.org/W2600646079","https://openalex.org/W2396272923","https://openalex.org/W1549877447","https://openalex.org/W2367919820","https://openalex.org/W1901649692"],"abstract_inverted_index":{"Based":[0],"on":[1,71],"search":[2,40,72],"engine":[3,73],",":[4],"this":[5,52],"paper":[6],"built":[7],"a":[8],"Web":[9,98],"information":[10,99],"retrieval":[11,66],"matching":[12,89],"and":[13,23,43,67,87,101,104],"structure":[14,48,100],"extraction":[15,69,83,103],"model.":[16],"And":[17],"realized":[18],"the":[19,31,39,45,55,61,81,88],"algorithm":[20],"of":[21,35,49,63],"locating":[22],"automatically":[24],"extracting":[25],"multi-web":[26],"Baidu":[27],"news":[28],"information.":[29],"Getting":[30],"standard":[32],"mathematical":[33],"expression":[34],"URLs":[36,42],"by":[37],"analyzing":[38,44],"results":[41,78],"DOM":[46],"tree":[47],"web":[50],"pages,":[51],"article":[53],"designed":[54],"key":[56],"tags":[57],"regular":[58],"expression.":[59],"Finally,":[60],"method":[62],"multi-page":[64],"location":[65],"structured":[68],"based":[70],"is":[74,85,91],"realized.":[75],"The":[76],"experimental":[77],"showed":[79],"that":[80],"average":[82],"result":[84],"99.60%,":[86],"ratio":[90],"99.56%.":[92],"It":[93],"can":[94],"be":[95],"used":[96],"for":[97],"automatic":[102],"local":[105],"preservation.":[106]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2018,"cited_by_count":2}],"updated_date":"2026-04-16T08:26:57.006410","created_date":"2017-05-19T00:00:00"}
