{"id":"https://openalex.org/W3019932981","doi":"https://doi.org/10.1145/3340531.3411908","title":"Beyond 512 Tokens: Siamese Multi-depth Transformer-based Hierarchical Encoder for Long-Form Document Matching","display_name":"Beyond 512 Tokens: Siamese Multi-depth Transformer-based Hierarchical Encoder for Long-Form Document Matching","publication_year":2020,"publication_date":"2020-10-19","ids":{"openalex":"https://openalex.org/W3019932981","doi":"https://doi.org/10.1145/3340531.3411908","mag":"3019932981"},"language":"en","primary_location":{"id":"doi:10.1145/3340531.3411908","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3340531.3411908","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3340531.3411908","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Information &amp; Knowledge Management","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3340531.3411908","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100355692","display_name":"Yang Liu","orcid":"https://orcid.org/0000-0001-7300-9215"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Liu Yang","raw_affiliation_strings":["Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100432821","display_name":"Mingyang Zhang","orcid":"https://orcid.org/0000-0002-9768-516X"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mingyang Zhang","raw_affiliation_strings":["Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100354301","display_name":"Cheng Li","orcid":"https://orcid.org/0000-0003-4664-5894"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cheng Li","raw_affiliation_strings":["Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032248436","display_name":"Michael Bendersky","orcid":"https://orcid.org/0000-0002-2941-6240"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael Bendersky","raw_affiliation_strings":["Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037200145","display_name":"Marc Najork","orcid":"https://orcid.org/0000-0003-1423-0854"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Marc Najork","raw_affiliation_strings":["Google Research, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Google Research, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100355692"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":6.6732,"has_fulltext":true,"cited_by_count":90,"citation_normalized_percentile":{"value":0.97594987,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1725","last_page":"1734"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7421112060546875},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7016098499298096},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.639062762260437},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.15923145413398743},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.09530436992645264},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09386962652206421}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7421112060546875},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7016098499298096},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.639062762260437},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.15923145413398743},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.09530436992645264},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09386962652206421},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3340531.3411908","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3340531.3411908","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3340531.3411908","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Information &amp; Knowledge Management","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2004.12297","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2004.12297","pdf_url":"https://arxiv.org/pdf/2004.12297","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3340531.3411908","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3340531.3411908","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3340531.3411908","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Information &amp; Knowledge Management","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.8100000023841858,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3019932981.pdf","grobid_xml":"https://content.openalex.org/works/W3019932981.grobid-xml"},"referenced_works_count":67,"referenced_works":["https://openalex.org/W131533222","https://openalex.org/W143775383","https://openalex.org/W836999996","https://openalex.org/W1840435438","https://openalex.org/W1966443646","https://openalex.org/W2136189984","https://openalex.org/W2153579005","https://openalex.org/W2170245882","https://openalex.org/W2170738476","https://openalex.org/W2194775991","https://openalex.org/W2251818205","https://openalex.org/W2286300105","https://openalex.org/W2294860948","https://openalex.org/W2427527485","https://openalex.org/W2470673105","https://openalex.org/W2515565210","https://openalex.org/W2536015822","https://openalex.org/W2538374209","https://openalex.org/W2539671052","https://openalex.org/W2593864460","https://openalex.org/W2626778328","https://openalex.org/W2648699835","https://openalex.org/W2740258984","https://openalex.org/W2741321799","https://openalex.org/W2753634799","https://openalex.org/W2769216919","https://openalex.org/W2798392716","https://openalex.org/W2891416139","https://openalex.org/W2896457183","https://openalex.org/W2911997761","https://openalex.org/W2922386288","https://openalex.org/W2940744433","https://openalex.org/W2945127593","https://openalex.org/W2945918281","https://openalex.org/W2946567085","https://openalex.org/W2949650786","https://openalex.org/W2949989304","https://openalex.org/W2950193743","https://openalex.org/W2950813464","https://openalex.org/W2951359136","https://openalex.org/W2953084091","https://openalex.org/W2959673509","https://openalex.org/W2962739339","https://openalex.org/W2962785754","https://openalex.org/W2962854379","https://openalex.org/W2963403868","https://openalex.org/W2963681593","https://openalex.org/W2964110616","https://openalex.org/W2970597249","https://openalex.org/W2984315581","https://openalex.org/W2986922898","https://openalex.org/W2994673210","https://openalex.org/W2997517014","https://openalex.org/W2998108143","https://openalex.org/W3000514857","https://openalex.org/W3021494766","https://openalex.org/W3106298483","https://openalex.org/W3130740619","https://openalex.org/W3131922516","https://openalex.org/W3175111331","https://openalex.org/W4288024261","https://openalex.org/W4294170691","https://openalex.org/W4295838474","https://openalex.org/W4299585995","https://openalex.org/W4323654151","https://openalex.org/W4385245566","https://openalex.org/W6600291067"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Many":[0],"natural":[1],"language":[2,191,200],"processing":[3],"and":[4,38,61,68,81,238,270],"information":[5],"retrieval":[6],"problems":[7],"can":[8],"be":[9],"formalized":[10],"as":[11],"the":[12,88,111,128,132,165,183,197,225],"task":[13,89,193,202],"of":[14,90,115],"semantic":[15,176],"matching.":[16,92,142,281],"Existing":[17],"work":[18],"in":[19,87,194],"this":[20,124],"area":[21],"has":[22,51],"been":[23],"largely":[24],"focused":[25],"on":[26,209,278],"matching":[27,46,217],"between":[28,35,47],"short":[29,37,100],"texts":[30],"(e.g.,":[31,42],"question":[32],"answering),":[33],"or":[34,106],"a":[36,39,103,158,179,186,263,271],"long":[40],"text":[41,91,101,121,154,253],"ad-hoc":[43],"retrieval).":[44],"Semantic":[45],"long-form":[48,140,215,279],"documents,":[49],"which":[50],"many":[52],"important":[53],"applications":[54],"like":[55,79,102],"news":[56],"recommendation,":[57],"related":[58],"article":[59],"recommendation":[60],"document":[62,141,166,216,280],"clustering,":[63],"is":[64,247],"relatively":[65],"less":[66],"explored":[67],"needs":[69],"more":[70],"research":[71,277],"effort.":[72],"In":[73,123,169],"recent":[74],"years,":[75],"self-attention":[76,116,150],"based":[77,160,243,265],"models":[78,151,228],"Transformers":[80],"BERT":[82,242],"have":[83],"achieved":[84],"state-of-the-art":[85,227],"performance":[86],"These":[93],"models,":[94],"however,":[95],"are":[96],"still":[97],"limited":[98],"to":[99,110,119,148,163,171,196,241,249,257,274],"few":[104],"sentences":[105],"one":[107],"paragraph":[108],"due":[109],"quadratic":[112],"computational":[113],"complexity":[114],"with":[117,185],"respect":[118],"input":[120,252],"length.":[122],"paper,":[125],"we":[126,181],"address":[127],"issue":[129],"by":[130,204],"proposing":[131],"Siamese":[133],"Multi-depth":[134],"Transformer-based":[135],"Hierarchical":[136],"(SMITH)":[137],"Encoder":[138],"for":[139,152,214],"Our":[143,206],"model":[144,184,223,246,273],"contains":[145],"several":[146,210],"innovations":[147],"adapt":[149],"longer":[153],"input.":[155],"We":[156,259],"propose":[157],"transformer":[159],"hierarchical":[161,230,234],"encoder":[162],"capture":[164,173],"structure":[167],"information.":[168],"order":[170],"better":[172],"sentence":[174,189],"level":[175],"relations":[177],"within":[178],"document,":[180],"pre-train":[182],"novel":[187],"masked":[188,198],"block":[190],"modeling":[192,201],"addition":[195],"word":[199],"used":[203],"BERT.":[205,239],"experimental":[207],"results":[208],"benchmark":[211,266],"data":[212,267],"sets":[213],"show":[218],"that":[219],"our":[220,245],"proposed":[221],"SMITH":[222],"outperforms":[224],"previous":[226],"including":[229],"attention,":[231],"multi-depth":[232],"attention-based":[233],"recurrent":[235],"neural":[236],"network,":[237],"Comparing":[240],"baselines,":[244],"able":[248],"increase":[250],"maximum":[251],"length":[254],"from":[255],"512":[256],"2048.":[258],"will":[260],"open":[261],"source":[262],"Wikipedia":[264],"set,":[268],"code":[269],"pre-trained":[272],"accelerate":[275],"future":[276]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":14},{"year":2023,"cited_by_count":19},{"year":2022,"cited_by_count":21},{"year":2021,"cited_by_count":25},{"year":2020,"cited_by_count":3}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2020-05-01T00:00:00"}
