{"id":"https://openalex.org/W4388955484","doi":"https://doi.org/10.1145/3624918.3625324","title":"Typos-aware Bottlenecked Pre-Training for Robust Dense Retrieval","display_name":"Typos-aware Bottlenecked Pre-Training for Robust Dense Retrieval","publication_year":2023,"publication_date":"2023-11-23","ids":{"openalex":"https://openalex.org/W4388955484","doi":"https://doi.org/10.1145/3624918.3625324"},"language":"en","primary_location":{"id":"doi:10.1145/3624918.3625324","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3624918.3625324","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Annual International ACM SIGIR Conference on Research and Development in Information Retrieval in the Asia Pacific Region","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012958162","display_name":"Shengyao Zhuang","orcid":"https://orcid.org/0000-0002-6711-0955"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"The University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Shengyao Zhuang","raw_affiliation_strings":["The University of Queensland, Australia"],"raw_orcid":"https://orcid.org/0000-0002-6711-0955","affiliations":[{"raw_affiliation_string":"The University of Queensland, Australia","institution_ids":["https://openalex.org/I165143802"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077262995","display_name":"Linjun Shou","orcid":"https://orcid.org/0000-0002-1050-7708"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linjun Shou","raw_affiliation_strings":["Microsoft, China"],"raw_orcid":"https://orcid.org/0000-0002-1050-7708","affiliations":[{"raw_affiliation_string":"Microsoft, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062247330","display_name":"Jian Pei","orcid":"https://orcid.org/0000-0002-2200-8711"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jian Pei","raw_affiliation_strings":["Duke University, USA"],"raw_orcid":"https://orcid.org/0000-0002-2200-8711","affiliations":[{"raw_affiliation_string":"Duke University, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101640051","display_name":"Ming Gong","orcid":"https://orcid.org/0000-0001-6140-7187"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ming Gong","raw_affiliation_strings":["Microsoft, China"],"raw_orcid":"https://orcid.org/0000-0001-6140-7187","affiliations":[{"raw_affiliation_string":"Microsoft, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085999236","display_name":"Houxing Ren","orcid":null},"institutions":[{"id":"https://openalex.org/I82880672","display_name":"Beihang University","ror":"https://ror.org/00wk2mp56","country_code":"CN","type":"education","lineage":["https://openalex.org/I82880672"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Houxing Ren","raw_affiliation_strings":["Beihang University, China"],"raw_orcid":"https://orcid.org/0000-0001-9750-1626","affiliations":[{"raw_affiliation_string":"Beihang University, China","institution_ids":["https://openalex.org/I82880672"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076031002","display_name":"Guido Zuccon","orcid":"https://orcid.org/0000-0003-0271-5563"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"The University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Guido Zuccon","raw_affiliation_strings":["The University of Queensland, Australia"],"raw_orcid":"https://orcid.org/0000-0003-0271-5563","affiliations":[{"raw_affiliation_string":"The University of Queensland, Australia","institution_ids":["https://openalex.org/I165143802"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060116992","display_name":"Daxin Jiang","orcid":"https://orcid.org/0000-0002-6657-5806"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Daxin Jiang","raw_affiliation_strings":["Microsoft, China"],"raw_orcid":"https://orcid.org/0000-0002-6657-5806","affiliations":[{"raw_affiliation_string":"Microsoft, China","institution_ids":["https://openalex.org/I4210113369"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5012958162"],"corresponding_institution_ids":["https://openalex.org/I165143802"],"apc_list":null,"apc_paid":null,"fwci":0.852,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.79408937,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"212","last_page":"222"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8172740340232849},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6117842793464661},{"id":"https://openalex.org/keywords/spell","display_name":"Spell","score":0.5756441354751587},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5585934519767761},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5553973317146301},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5298570990562439},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5024352073669434},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4588819444179535},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4251604676246643},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.34422385692596436},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.13276126980781555}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8172740340232849},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6117842793464661},{"id":"https://openalex.org/C2780957641","wikidata":"https://www.wikidata.org/wiki/Q1999796","display_name":"Spell","level":2,"score":0.5756441354751587},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5585934519767761},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5553973317146301},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5298570990562439},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5024352073669434},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4588819444179535},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4251604676246643},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34422385692596436},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.13276126980781555},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3624918.3625324","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3624918.3625324","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Annual International ACM SIGIR Conference on Research and Development in Information Retrieval in the Asia Pacific Region","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1968927634","https://openalex.org/W1984565341","https://openalex.org/W2081058719","https://openalex.org/W2127838257","https://openalex.org/W2741195357","https://openalex.org/W2979826702","https://openalex.org/W3036320503","https://openalex.org/W3098468692","https://openalex.org/W3115462295","https://openalex.org/W3147292006","https://openalex.org/W3152887675","https://openalex.org/W3154670582","https://openalex.org/W3155895380","https://openalex.org/W3168875417","https://openalex.org/W3174367567","https://openalex.org/W3184918446","https://openalex.org/W3188983256","https://openalex.org/W3198691721","https://openalex.org/W3201233724","https://openalex.org/W3206455169","https://openalex.org/W3212725701","https://openalex.org/W3214779765","https://openalex.org/W4206121183","https://openalex.org/W4226325130","https://openalex.org/W4238430687","https://openalex.org/W4284669182","https://openalex.org/W4284697650","https://openalex.org/W4285605369","https://openalex.org/W4362686793"],"related_works":["https://openalex.org/W4287256643","https://openalex.org/W2745258745","https://openalex.org/W4296626305","https://openalex.org/W4254978807","https://openalex.org/W4287269020","https://openalex.org/W4307454463","https://openalex.org/W4287201819","https://openalex.org/W4252852831","https://openalex.org/W4394050964","https://openalex.org/W2551249631"],"abstract_inverted_index":{"Current":[0],"dense":[1],"retrievers":[2],"(DRs)":[3],"are":[4,39,51,71],"limited":[5],"in":[6,22,55],"their":[7,94],"ability":[8],"to":[9,74,84],"effectively":[10],"process":[11],"misspelled":[12,65,97],"queries,":[13],"which":[14],"constitute":[15],"a":[16],"significant":[17],"portion":[18],"of":[19,103],"query":[20],"traffic":[21],"commercial":[23],"search":[24],"engines.":[25],"The":[26],"main":[27],"issue":[28,87],"is":[29],"that":[30,102,105],"the":[31,56,75],"pre-trained":[32],"language":[33],"model-based":[34],"encoders":[35],"used":[36,58,77],"by":[37],"DRs":[38],"typically":[40,52],"trained":[41],"and":[42,63,80],"fine-tuned":[43],"using":[44],"clean,":[45],"well-curated":[46],"text":[47],"data.":[48],"Misspelled":[49],"queries":[50,66,98],"not":[53],"found":[54],"data":[57,76],"for":[59,78],"training":[60,79],"these":[61],"models,":[62],"thus":[64],"observed":[67],"at":[68],"inference":[69],"time":[70],"out-of-distribution":[72],"compared":[73],"fine-tuning.":[81],"Previous":[82],"efforts":[83],"address":[85],"this":[86],"have":[88],"focused":[89],"on":[90,96],"fine-tuning":[91],"strategies,":[92],"but":[93],"effectiveness":[95],"remains":[99],"lower":[100],"than":[101],"pipelines":[104],"employ":[106],"separate":[107],"state-of-the-art":[108],"spell-checking":[109],"components.":[110]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
