{"id":"https://openalex.org/W4221167659","doi":"https://doi.org/10.1145/3485447.3512032","title":"WebFormer: The Web-page Transformer for Structure Information Extraction","display_name":"WebFormer: The Web-page Transformer for Structure Information Extraction","publication_year":2022,"publication_date":"2022-04-25","ids":{"openalex":"https://openalex.org/W4221167659","doi":"https://doi.org/10.1145/3485447.3512032"},"language":"en","primary_location":{"id":"doi:10.1145/3485447.3512032","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3485447.3512032","pdf_url":null,"source":{"id":"https://openalex.org/S4363608783","display_name":"Proceedings of the ACM Web Conference 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Web Conference 2022","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101774657","display_name":"Qifan Wang","orcid":"https://orcid.org/0000-0002-1237-366X"},"institutions":[{"id":"https://openalex.org/I4210114444","display_name":"Meta (United States)","ror":"https://ror.org/01zbnvs85","country_code":"US","type":"company","lineage":["https://openalex.org/I4210114444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Qifan Wang","raw_affiliation_strings":["Facebook AI, USA"],"affiliations":[{"raw_affiliation_string":"Facebook AI, USA","institution_ids":["https://openalex.org/I4210114444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105518877","display_name":"Yi Fang","orcid":"https://orcid.org/0009-0000-6102-2201"},"institutions":[{"id":"https://openalex.org/I16269868","display_name":"Santa Clara University","ror":"https://ror.org/03ypqe447","country_code":"US","type":"education","lineage":["https://openalex.org/I16269868"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yi Fang","raw_affiliation_strings":["Santa Clara University, USA"],"affiliations":[{"raw_affiliation_string":"Santa Clara University, USA","institution_ids":["https://openalex.org/I16269868"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009633383","display_name":"Anirudh Ravula","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anirudh Ravula","raw_affiliation_strings":["Google Research, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051925942","display_name":"Fuli Feng","orcid":"https://orcid.org/0000-0002-5828-9842"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fuli Feng","raw_affiliation_strings":["University of Science and Technology of China, China"],"affiliations":[{"raw_affiliation_string":"University of Science and Technology of China, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040062188","display_name":"Xiaojun Quan","orcid":"https://orcid.org/0000-0002-8385-1083"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaojun Quan","raw_affiliation_strings":["Sun Yat-sen University, China"],"affiliations":[{"raw_affiliation_string":"Sun Yat-sen University, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101979289","display_name":"Dongfang Liu","orcid":"https://orcid.org/0000-0001-6995-4775"},"institutions":[{"id":"https://openalex.org/I155173764","display_name":"Rochester Institute of Technology","ror":"https://ror.org/00v4yb702","country_code":"US","type":"education","lineage":["https://openalex.org/I155173764"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dongfang Liu","raw_affiliation_strings":["Rochester Institute of Technology, USA"],"affiliations":[{"raw_affiliation_string":"Rochester Institute of Technology, USA","institution_ids":["https://openalex.org/I155173764"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5101774657"],"corresponding_institution_ids":["https://openalex.org/I4210114444"],"apc_list":null,"apc_paid":null,"fwci":8.4632,"has_fulltext":false,"cited_by_count":59,"citation_normalized_percentile":{"value":0.98441967,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"3124","last_page":"3133"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9577999711036682,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.949999988079071,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8760395050048828},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.6916946172714233},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5858290791511536},{"id":"https://openalex.org/keywords/static-web-page","display_name":"Static web page","score":0.51240473985672},{"id":"https://openalex.org/keywords/web-modeling","display_name":"Web modeling","score":0.5030223727226257},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.49386388063430786},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.4575999081134796},{"id":"https://openalex.org/keywords/web-mining","display_name":"Web mining","score":0.45211800932884216},{"id":"https://openalex.org/keywords/web-search-engine","display_name":"Web search engine","score":0.451255202293396},{"id":"https://openalex.org/keywords/web-navigation","display_name":"Web navigation","score":0.3636208772659302}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8760395050048828},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.6916946172714233},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5858290791511536},{"id":"https://openalex.org/C173576120","wikidata":"https://www.wikidata.org/wiki/Q2641220","display_name":"Static web page","level":4,"score":0.51240473985672},{"id":"https://openalex.org/C130436687","wikidata":"https://www.wikidata.org/wiki/Q7978591","display_name":"Web modeling","level":3,"score":0.5030223727226257},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.49386388063430786},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4575999081134796},{"id":"https://openalex.org/C197046077","wikidata":"https://www.wikidata.org/wiki/Q785337","display_name":"Web mining","level":3,"score":0.45211800932884216},{"id":"https://openalex.org/C521815418","wikidata":"https://www.wikidata.org/wiki/Q4182287","display_name":"Web search engine","level":4,"score":0.451255202293396},{"id":"https://openalex.org/C61096286","wikidata":"https://www.wikidata.org/wiki/Q7978592","display_name":"Web navigation","level":3,"score":0.3636208772659302}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3485447.3512032","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3485447.3512032","pdf_url":null,"source":{"id":"https://openalex.org/S4363608783","display_name":"Proceedings of the ACM Web Conference 2022","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Web Conference 2022","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.41999998688697815,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1795600436","https://openalex.org/W1976022204","https://openalex.org/W2024091454","https://openalex.org/W2061760029","https://openalex.org/W2080132606","https://openalex.org/W2134150392","https://openalex.org/W2146266087","https://openalex.org/W2154444297","https://openalex.org/W2161861392","https://openalex.org/W2400661088","https://openalex.org/W2508333606","https://openalex.org/W2517903345","https://openalex.org/W2598202925","https://openalex.org/W2788885658","https://openalex.org/W2805173585","https://openalex.org/W2944898795","https://openalex.org/W2954936423","https://openalex.org/W2962881743","https://openalex.org/W2962982640","https://openalex.org/W2963748441","https://openalex.org/W2963925437","https://openalex.org/W2964346820","https://openalex.org/W2982150889","https://openalex.org/W2997827534","https://openalex.org/W2998209348","https://openalex.org/W3019932981","https://openalex.org/W3034300118","https://openalex.org/W3034902017","https://openalex.org/W3035131649","https://openalex.org/W3081176230","https://openalex.org/W3092968218","https://openalex.org/W3093838622","https://openalex.org/W3099461227","https://openalex.org/W3102567691","https://openalex.org/W3105188155","https://openalex.org/W3116122129","https://openalex.org/W3121976951","https://openalex.org/W3169766753","https://openalex.org/W3173229273","https://openalex.org/W3173306993","https://openalex.org/W3173793851","https://openalex.org/W3190292546","https://openalex.org/W3190448953","https://openalex.org/W3202839357","https://openalex.org/W4224919569","https://openalex.org/W4250847188"],"related_works":["https://openalex.org/W4224286275","https://openalex.org/W2415191659","https://openalex.org/W2744329849","https://openalex.org/W3139873369","https://openalex.org/W1996869586","https://openalex.org/W2337873446","https://openalex.org/W2145399176","https://openalex.org/W2544264020","https://openalex.org/W2404573736","https://openalex.org/W2021161024"],"abstract_inverted_index":{"Structure":[0],"information":[1,62,110],"extraction":[2,111],"refers":[3],"to":[4,77],"the":[5,90,95,125,151,175,179],"task":[6],"of":[7,80,164,178],"extracting":[8,17,94],"structured":[9],"text":[10,96,147],"fields":[11],"from":[12,21,68,112,130],"web":[13,47,61,70,81,91,113,152],"pages,":[14],"such":[15],"as":[16],"a":[18,22,78,104],"product":[19,26],"offer":[20],"shopping":[23],"page":[24],"including":[25],"title,":[27],"description,":[28],"brand":[29],"and":[30,46,146,168],"price.":[31],"It":[32],"is":[33,72],"an":[34,161],"important":[35],"research":[36],"topic":[37],"which":[38,149],"has":[39,86],"been":[40],"widely":[41],"studied":[42],"in":[43,74,124],"document":[44],"understanding":[45],"search.":[48],"Recent":[49],"natural":[50],"language":[51],"models":[52],"with":[53],"sequence":[54],"modeling":[55,89],"have":[56],"demonstrated":[57],"state-of-the-art":[58,184],"performance":[59,177],"on":[60,88,166],"extraction.":[63],"However,":[64],"effectively":[65],"serializing":[66],"tokens":[67,119,133,145],"unstructured":[69],"pages":[71],"challenging":[73],"practice":[75],"due":[76],"variety":[79],"layout":[82,92,153],"patterns.":[83],"Limited":[84],"work":[85],"focused":[87],"for":[93,108,120,154],"fields.":[97],"In":[98],"this":[99],"paper,":[100],"we":[101,116,138],"introduce":[102],"WebFormer,":[103],"Web-page":[105],"transFormer":[106],"model":[107],"structure":[109],"documents.":[114],"First,":[115],"design":[117],"HTML":[118,126,144],"each":[121],"DOM":[122],"node":[123],"by":[127],"embedding":[128],"representations":[129],"their":[131],"neighboring":[132],"through":[134],"graph":[135],"attention.":[136],"Second,":[137],"construct":[139],"rich":[140],"attention":[141,156],"patterns":[142],"between":[143],"tokens,":[148],"leverages":[150],"effective":[155],"weight":[157],"computation.":[158],"We":[159],"conduct":[160],"extensive":[162],"set":[163],"experiments":[165],"SWDE":[167],"Common":[169],"Crawl":[170],"benchmarks.":[171],"Experimental":[172],"results":[173],"demonstrate":[174],"superior":[176],"proposed":[180],"approach":[181],"over":[182],"several":[183],"methods.":[185]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":17},{"year":2023,"cited_by_count":19},{"year":2022,"cited_by_count":10}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
