{"id":"https://openalex.org/W2444513755","doi":"https://doi.org/10.1109/icde.2016.7498308","title":"Learning to query: Focused web page harvesting for entity aspects","display_name":"Learning to query: Focused web page harvesting for entity aspects","publication_year":2016,"publication_date":"2016-05-01","ids":{"openalex":"https://openalex.org/W2444513755","doi":"https://doi.org/10.1109/icde.2016.7498308","mag":"2444513755"},"language":"en","primary_location":{"id":"doi:10.1109/icde.2016.7498308","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde.2016.7498308","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE 32nd International Conference on Data Engineering (ICDE)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://ink.library.smu.edu.sg/sis_research/4066","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055103025","display_name":"Yuan Fang","orcid":"https://orcid.org/0000-0002-4265-5289"},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Yuan Fang","raw_affiliation_strings":["Institute for Infocomm Research, Singapore"],"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research, Singapore","institution_ids":["https://openalex.org/I3005327000"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065995973","display_name":"Vincent W. Zheng","orcid":"https://orcid.org/0000-0002-0904-3184"},"institutions":[{"id":"https://openalex.org/I4210108443","display_name":"Advanced Digital Sciences Center","ror":"https://ror.org/01xaqx887","country_code":"SG","type":"facility","lineage":["https://openalex.org/I4210108443"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Vincent W. Zheng","raw_affiliation_strings":["Advanced Digital Sciences Center, Singapore"],"affiliations":[{"raw_affiliation_string":"Advanced Digital Sciences Center, Singapore","institution_ids":["https://openalex.org/I4210108443"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101880377","display_name":"Kevin Chen\u2013Chuan Chang","orcid":"https://orcid.org/0000-0003-0997-6803"},"institutions":[{"id":"https://openalex.org/I4210108443","display_name":"Advanced Digital Sciences Center","ror":"https://ror.org/01xaqx887","country_code":"SG","type":"facility","lineage":["https://openalex.org/I4210108443"]},{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["SG","US"],"is_corresponding":false,"raw_author_name":"Kevin Chen-Chuan Chang","raw_affiliation_strings":["Advanced Digital Sciences Center, Singapore","University of Illinois, Urbana-Champaign, USA"],"affiliations":[{"raw_affiliation_string":"Advanced Digital Sciences Center, Singapore","institution_ids":["https://openalex.org/I4210108443"]},{"raw_affiliation_string":"University of Illinois, Urbana-Champaign, USA","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5055103025"],"corresponding_institution_ids":["https://openalex.org/I3005327000"],"apc_list":null,"apc_paid":null,"fwci":1.3857,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.86363711,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"2","issue":null,"first_page":"1002","last_page":"1013"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9900000095367432,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.788062334060669},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6004530787467957},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.533391535282135},{"id":"https://openalex.org/keywords/web-search-query","display_name":"Web search query","score":0.5126025080680847},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.45055344700813293},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.309749960899353}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.788062334060669},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6004530787467957},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.533391535282135},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.5126025080680847},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.45055344700813293},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.309749960899353}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icde.2016.7498308","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icde.2016.7498308","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 IEEE 32nd International Conference on Data Engineering (ICDE)","raw_type":"proceedings-article"},{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-5069","is_oa":true,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/4066","pdf_url":null,"source":{"id":"https://openalex.org/S4306401925","display_name":"Singapore Management University Institutional Knowledge (InK) (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://doi.org/10.1109/ICDE.2016.7498308","raw_type":"Conference Proceeding Article"}],"best_oa_location":{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-5069","is_oa":true,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/4066","pdf_url":null,"source":{"id":"https://openalex.org/S4306401925","display_name":"Singapore Management University Institutional Knowledge (InK) (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://doi.org/10.1109/ICDE.2016.7498308","raw_type":"Conference Proceeding Article"},"sustainable_development_goals":[{"score":0.4099999964237213,"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W11298561","https://openalex.org/W1489949474","https://openalex.org/W1964348731","https://openalex.org/W2014016001","https://openalex.org/W2019701044","https://openalex.org/W2026738364","https://openalex.org/W2029341294","https://openalex.org/W2063943022","https://openalex.org/W2081948558","https://openalex.org/W2084480549","https://openalex.org/W2093245971","https://openalex.org/W2096891167","https://openalex.org/W2102563107","https://openalex.org/W2103931177","https://openalex.org/W2104049510","https://openalex.org/W2105540829","https://openalex.org/W2115457429","https://openalex.org/W2115461474","https://openalex.org/W2118045473","https://openalex.org/W2120642948","https://openalex.org/W2122922389","https://openalex.org/W2123442489","https://openalex.org/W2124673015","https://openalex.org/W2128384372","https://openalex.org/W2133299088","https://openalex.org/W2136542423","https://openalex.org/W2152766222","https://openalex.org/W2156940638","https://openalex.org/W4240913316","https://openalex.org/W4285719527","https://openalex.org/W6600479677","https://openalex.org/W6671596635","https://openalex.org/W6677778574","https://openalex.org/W6678701036","https://openalex.org/W6682602269"],"related_works":["https://openalex.org/W1978230837","https://openalex.org/W1488511360","https://openalex.org/W167737004","https://openalex.org/W1781894645","https://openalex.org/W1964038241","https://openalex.org/W2894902932","https://openalex.org/W2185038817","https://openalex.org/W2004064649","https://openalex.org/W2385957133","https://openalex.org/W569715723"],"abstract_inverted_index":{"As":[0],"the":[1,21,50,58,62,100,103,115,152],"Web":[2,27],"hosts":[3],"rich":[4],"information":[5,10,56],"about":[6],"real-world":[7],"entities,":[8],"our":[9,65,174],"quests":[11],"become":[12,142,167],"increasingly":[13],"entity":[14,30,90,108,128],"centric.":[15],"In":[16,110],"this":[17],"paper,":[18],"we":[19,60,79,117],"study":[20],"problem":[22,66],"of":[23,26,64,92,102],"focused":[24,87],"harvesting":[25],"pages":[28],"for":[29,162],"aspects,":[31],"to":[32,54,69,73,98,112,141,166],"support":[33],"downstream":[34],"applications":[35],"such":[36],"as":[37,67],"business":[38],"analytics":[39],"and":[40,121,180,185],"building":[41],"a":[42,84,126,130,146],"vertical":[43],"portal.":[44],"Given":[45],"that":[46,78,155,173],"search":[47,85],"engines":[48],"are":[49],"de":[51],"facto":[52],"gateways":[53],"assess":[55],"on":[57,88],"Web,":[59],"recognize":[61],"essence":[63],"Learning":[68],"Query":[70],"(L2Q)":[71],"-":[72],"intelligently":[74],"select":[75],"queries":[76,105,154,165],"so":[77],"can":[80],"harvest":[81],"pages,":[82],"via":[83],"engine,":[86],"an":[89],"aspect":[91],"interest.":[93],"Thus,":[94],"it":[95],"is":[96],"crucial":[97],"quantify":[99],"utilities":[101],"candidate":[104,147],"w.r.t.":[106],"some":[107],"aspect.":[109],"order":[111],"better":[113],"estimate":[114],"utilities,":[116],"identify":[118],"two":[119],"opportunities":[120],"address":[122],"their":[123],"challenges.":[124],"First,":[125],"target":[127],"in":[129,187],"given":[131],"domain":[132,143],"has":[133],"many":[134],"peers.":[135],"We":[136,160],"leverage":[137],"these":[138,163],"peer":[139],"entities":[140],"aware.":[144,169],"Second,":[145],"query":[148],"may":[149],"\u201coverlap\u201d":[150],"with":[151],"past":[153,164],"have":[156],"already":[157],"been":[158],"fired.":[159],"account":[161],"context":[168],"Empirical":[170],"results":[171],"show":[172],"approach":[175],"significantly":[176],"outperforms":[177],"both":[178],"algorithmic":[179],"manual":[181],"baselines":[182],"by":[183],"16%":[184],"10%":[186],"F-scores,":[188],"respectively.":[189]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2016,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
