{"id":"https://openalex.org/W4396723634","doi":"https://doi.org/10.1145/3589334.3645707","title":"Towards Cross-Table Masked Pretraining for Web Data Mining","display_name":"Towards Cross-Table Masked Pretraining for Web Data Mining","publication_year":2024,"publication_date":"2024-05-08","ids":{"openalex":"https://openalex.org/W4396723634","doi":"https://doi.org/10.1145/3589334.3645707"},"language":"en","primary_location":{"id":"doi:10.1145/3589334.3645707","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3589334.3645707","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Web Conference 2024","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102863976","display_name":"Chao Ye","orcid":"https://orcid.org/0009-0006-1356-8246"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chao Ye","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086162908","display_name":"Guoshan Lu","orcid":"https://orcid.org/0000-0002-1732-9617"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoshan Lu","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049707744","display_name":"Haobo Wang","orcid":"https://orcid.org/0000-0001-8586-3048"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haobo Wang","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101885231","display_name":"Liyao Li","orcid":"https://orcid.org/0009-0005-5235-1982"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liyao Li","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064646829","display_name":"Sai Wu","orcid":"https://orcid.org/0000-0002-1866-9197"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sai Wu","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100389286","display_name":"Gang Chen","orcid":"https://orcid.org/0000-0002-7483-0045"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Chen","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5034520734","display_name":"Junbo Zhao","orcid":"https://orcid.org/0000-0002-3637-2936"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junbo Zhao","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5102863976"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":6.0145,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.96133641,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"4449","last_page":"4459"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7424271702766418},{"id":"https://openalex.org/keywords/table","display_name":"Table (database)","score":0.6886005401611328},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.4320544898509979},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3957226276397705},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.32153573632240295},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.29557135701179504}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7424271702766418},{"id":"https://openalex.org/C45235069","wikidata":"https://www.wikidata.org/wiki/Q278425","display_name":"Table (database)","level":2,"score":0.6886005401611328},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4320544898509979},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3957226276397705},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.32153573632240295},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.29557135701179504}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3589334.3645707","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3589334.3645707","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Web Conference 2024","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W2064186732","https://openalex.org/W2108223890","https://openalex.org/W2122646361","https://openalex.org/W2295598076","https://openalex.org/W2898085636","https://openalex.org/W2951559648","https://openalex.org/W2951621897","https://openalex.org/W2963541420","https://openalex.org/W2963832024","https://openalex.org/W3081168214","https://openalex.org/W3098903006","https://openalex.org/W3101704389","https://openalex.org/W3129639992","https://openalex.org/W3155299751","https://openalex.org/W3174082502","https://openalex.org/W3174086521","https://openalex.org/W3206529130","https://openalex.org/W4200630877","https://openalex.org/W4212774754","https://openalex.org/W4221009220","https://openalex.org/W4281826654","https://openalex.org/W4286908171","https://openalex.org/W4287114832","https://openalex.org/W4312804044","https://openalex.org/W4313156423","https://openalex.org/W4365456672","https://openalex.org/W4367047307","https://openalex.org/W4367047505","https://openalex.org/W4382318057","https://openalex.org/W4389524441","https://openalex.org/W4390874575","https://openalex.org/W6600213211","https://openalex.org/W6712758098"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4394360958"],"abstract_inverted_index":{"Tabular":[0],"data":[1,46,142],"pervades":[2],"the":[3,6,15,23,38,48,78,86,89,92,96,106,110,122,137,146,175],"landscape":[4],"of":[5,26,40,73,80,88,95],"World":[7],"Wide":[8],"Web,":[9],"playing":[10],"a":[11,53,81,150,157,180,195],"foundational":[12],"role":[13],"in":[14,77],"digital":[16],"architecture":[17],"that":[18,101,185,226],"underpins":[19],"online":[20],"information.":[21],"Given":[22],"recent":[24,63],"influence":[25],"large-scale":[27],"pretrained":[28],"models":[29],"like":[30],"ChatGPT":[31],"and":[32,91,166,193,224],"SAM":[33],"across":[34],"various":[35,231],"domains,":[36],"exploring":[37],"application":[39],"pretraining":[41,169,197,214,228],"techniques":[42],"for":[43,109],"mining":[44],"tabular":[45,112,141,160,182],"on":[47,116,215],"web":[49],"has":[50],"emerged":[51],"as":[52,127,172],"highly":[54],"promising":[55],"research":[56,124],"direction.":[57],"Indeed,":[58],"there":[59],"have":[60,103],"been":[61],"some":[62],"works":[64],"around":[65],"this":[66,117,132,153],"topic":[67],"where":[68,174],"most":[69],"(if":[70],"not":[71,104],"all)":[72],"them":[74],"are":[75],"limited":[76],"scope":[79],"fixed-schema/single":[82],"table.":[83],"Due":[84],"to":[85,177,212],"scale":[87],"dataset":[90],"parameter":[93],"size":[94],"prior":[97],"models,":[98],"we":[99,102,134],"believe":[100],"reached":[105],"''BERT":[107],"moment''":[108],"ubiquitous":[111],"data.":[113],"The":[114],"development":[115],"line":[118],"significantly":[119],"lags":[120],"behind":[121,140],"counterpart":[123],"domains":[125],"such":[126],"natural":[128],"language":[129],"processing.":[130],"In":[131],"work,":[133],"first":[135],"identify":[136],"crucial":[138],"challenges":[139],"pretraining,":[143],"particularly":[144],"overcoming":[145],"cross-table":[147,168,227],"hurdle.":[148],"As":[149],"pioneering":[151],"endeavor,":[152],"work":[154],"mainly":[155],"(i)-contributes":[156],"high-quality":[158],"real-world":[159],"dataset,":[161],"(ii)-proposes":[162],"an":[163],"innovative,":[164],"generic,":[165],"efficient":[167],"framework,":[170],"dubbed":[171],"CM2,":[173],"core":[176],"it":[178],"comprises":[179],"semantic-aware":[181],"neural":[183],"network":[184],"uniformly":[186],"encodes":[187],"heterogeneous":[188],"tables":[189],"without":[190],"much":[191],"restriction":[192],"(iii)-introduces":[194],"novel":[196],"objective":[198],"---":[199,205],"prompt":[200],"Masked":[201],"Table":[202],"Modeling":[203],"(pMTM)":[204],"inspired":[206],"by":[207],"NLP":[208],"but":[209],"intricately":[210],"tailored":[211],"scalable":[213],"tables.":[216],"Our":[217],"extensive":[218],"experiments":[219],"demonstrate":[220],"CM2's":[221],"state-of-the-art":[222],"performance":[223],"validate":[225],"can":[229],"enhance":[230],"downstream":[232],"tasks.":[233]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":2}],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
