{"id":"https://openalex.org/W3081176230","doi":"https://doi.org/10.1145/3394486.3403153","title":"FreeDOM: A Transferable Neural Architecture for Structured Information Extraction on Web Documents","display_name":"FreeDOM: A Transferable Neural Architecture for Structured Information Extraction on Web Documents","publication_year":2020,"publication_date":"2020-08-20","ids":{"openalex":"https://openalex.org/W3081176230","doi":"https://doi.org/10.1145/3394486.3403153","mag":"3081176230"},"language":"en","primary_location":{"id":"doi:10.1145/3394486.3403153","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3394486.3403153","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3394486.3403153","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3394486.3403153","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Bill Yuchen Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bill Yuchen Lin","raw_affiliation_strings":["University of Southern California, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ying Sheng","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ying Sheng","raw_affiliation_strings":["Google, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Nguyen Vo","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nguyen Vo","raw_affiliation_strings":["Google, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":null,"display_name":"Sandeep Tata","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sandeep Tata","raw_affiliation_strings":["Google, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I1174212"],"apc_list":null,"apc_paid":null,"fwci":5.0065,"has_fulltext":true,"cited_by_count":31,"citation_normalized_percentile":{"value":0.9575647,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1092","last_page":"1102"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9889000058174133,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.7253999710083008},{"id":"https://openalex.org/keywords/markup-language","display_name":"Markup language","score":0.6378999948501587},{"id":"https://openalex.org/keywords/html","display_name":"HTML","score":0.5361999869346619},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5252000093460083},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.49160000681877136},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4578000009059906},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4553999900817871},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.40959998965263367},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.3783000111579895}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.823199987411499},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.7253999710083008},{"id":"https://openalex.org/C45874996","wikidata":"https://www.wikidata.org/wiki/Q37045","display_name":"Markup language","level":3,"score":0.6378999948501587},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6089000105857849},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5483999848365784},{"id":"https://openalex.org/C138708601","wikidata":"https://www.wikidata.org/wiki/Q8811","display_name":"HTML","level":3,"score":0.5361999869346619},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5252000093460083},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.49160000681877136},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4578000009059906},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4553999900817871},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.40959998965263367},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.400299996137619},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.3783000111579895},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.3424000144004822},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3359000086784363},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.329800009727478},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.32919999957084656},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.314300000667572},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.31119999289512634},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.3102000057697296},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.29980000853538513},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.2946000099182129},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2937000095844269},{"id":"https://openalex.org/C17305859","wikidata":"https://www.wikidata.org/wiki/Q382944","display_name":"Soar","level":2,"score":0.28999999165534973},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.28279998898506165},{"id":"https://openalex.org/C5655090","wikidata":"https://www.wikidata.org/wiki/Q192588","display_name":"Relational database","level":2,"score":0.2800999879837036},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2533000111579895},{"id":"https://openalex.org/C142575187","wikidata":"https://www.wikidata.org/wiki/Q3358290","display_name":"Pyramid (geometry)","level":2,"score":0.25200000405311584},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.2515999972820282}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3394486.3403153","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3394486.3403153","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3394486.3403153","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2010.10755","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2010.10755","pdf_url":"https://arxiv.org/pdf/2010.10755","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3394486.3403153","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3394486.3403153","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3394486.3403153","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery &amp; Data Mining","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3081176230.pdf","grobid_xml":"https://content.openalex.org/works/W3081176230.grobid-xml"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W1772106736","https://openalex.org/W1795600436","https://openalex.org/W1971415141","https://openalex.org/W2016753842","https://openalex.org/W2024091454","https://openalex.org/W2038172683","https://openalex.org/W2088600132","https://openalex.org/W2104086170","https://openalex.org/W2134150392","https://openalex.org/W2147880316","https://openalex.org/W2148317291","https://openalex.org/W2161861392","https://openalex.org/W2163072729","https://openalex.org/W2250521169","https://openalex.org/W2250539671","https://openalex.org/W2296283641","https://openalex.org/W2356873185","https://openalex.org/W2470673105","https://openalex.org/W2564425030","https://openalex.org/W2573020254","https://openalex.org/W2584356431","https://openalex.org/W2604259521","https://openalex.org/W2606507016","https://openalex.org/W2624614404","https://openalex.org/W2724395316","https://openalex.org/W2757931374","https://openalex.org/W2890494294","https://openalex.org/W2890989031","https://openalex.org/W2896457183","https://openalex.org/W2912351665","https://openalex.org/W2912664727","https://openalex.org/W2914479823","https://openalex.org/W2949681443","https://openalex.org/W2949861626","https://openalex.org/W2962982640","https://openalex.org/W2963958374","https://openalex.org/W2964120615","https://openalex.org/W2983995706","https://openalex.org/W3012687255","https://openalex.org/W3035131649","https://openalex.org/W3139115194","https://openalex.org/W6633154970","https://openalex.org/W6633979047","https://openalex.org/W6681480118"],"related_works":[],"abstract_inverted_index":{"Extracting":[0],"structured":[1],"data":[2],"from":[3,134],"HTML":[4],"documents":[5],"is":[6,118],"a":[7,11,39,65,81,109,128,151],"long-studied":[8],"problem":[9],"with":[10,154],"broad":[12],"range":[13,103],"of":[14,42,58,131,145,166],"applications":[15],"like":[16,30],"augmenting":[17],"knowledge":[18],"bases,":[19],"supporting":[20],"faceted":[21],"search,":[22],"and":[23,32,95,105],"providing":[24],"domain-specific":[25],"experiences":[26],"for":[27,44,83],"key":[28],"verticals":[29],"shopping":[31],"movies.":[33],"Previous":[34],"approaches":[35],"have":[36],"either":[37],"required":[38],"small":[40,129],"number":[41,130],"examples":[43],"each":[45,84],"target":[46],"site":[47],"or":[48,182],"relied":[49],"on":[50,127,150,174],"carefully":[51],"handcrafted":[52],"heuristics":[53],"built":[54],"over":[55,142,179],"visual":[56,143],"renderings":[57,144],"websites.":[59],"In":[60],"this":[61],"paper,":[62],"we":[63,158],"present":[64],"novel":[66],"two-stage":[67],"neural":[68,111],"approach,":[69],"named":[70],"FreeDOM,":[71],"which":[72],"overcomes":[73],"both":[74,92],"these":[75,115],"limitations.":[76],"The":[77,98],"first":[78],"stage":[79,100],"learns":[80],"representation":[82],"DOM":[85],"node":[86],"in":[87],"the":[88,93,146,163,167],"page":[89],"by":[90,169],"combining":[91,114],"text":[94],"markup":[96],"information.":[97],"second":[99],"captures":[101],"longer":[102],"distance":[104],"semantic":[106],"relatedness":[107],"using":[108],"relational":[110],"network.":[112],"By":[113],"stages,":[116],"FreeDOM":[117,161],"able":[119],"to":[120,122],"generalize":[121],"unseen":[123],"sites":[124,133],"after":[125],"training":[126],"seed":[132],"that":[135,160],"vertical":[136],"without":[137,176],"requiring":[138,177],"expensive":[139,183],"hand-crafted":[140,184],"features":[141,178],"page.":[147],"Through":[148],"experiments":[149],"public":[152],"dataset":[153],"8":[155],"different":[156],"verticals,":[157],"show":[159],"beats":[162],"previous":[164],"state":[165],"art":[168],"nearly":[170],"3.7":[171],"F1":[172],"points":[173],"average":[175],"rendered":[180],"pages":[181],"features.":[185]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":6}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2020-09-01T00:00:00"}
