{"id":"https://openalex.org/W4284669679","doi":"https://doi.org/10.1145/3477495.3531863","title":"InPars: Unsupervised Dataset Generation for Information Retrieval","display_name":"InPars: Unsupervised Dataset Generation for Information Retrieval","publication_year":2022,"publication_date":"2022-07-06","ids":{"openalex":"https://openalex.org/W4284669679","doi":"https://doi.org/10.1145/3477495.3531863"},"language":"en","primary_location":{"id":"doi:10.1145/3477495.3531863","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3477495.3531863","pdf_url":null,"source":{"id":"https://openalex.org/S4363608773","display_name":"Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042382604","display_name":"Luiz Bonifacio","orcid":"https://orcid.org/0000-0003-0637-0187"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Luiz Bonifacio","raw_affiliation_strings":["Zeta Alpha, NeuralMind, &amp; University of Campinas, Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"Zeta Alpha, NeuralMind, &amp; University of Campinas, Amsterdam, Netherlands","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043191034","display_name":"Hugo Abonizio","orcid":"https://orcid.org/0000-0001-5208-0290"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hugo Abonizio","raw_affiliation_strings":["Zeta Alpha &amp; NeuralMind, Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"Zeta Alpha &amp; NeuralMind, Amsterdam, Netherlands","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070153551","display_name":"Marzieh Fadaee","orcid":"https://orcid.org/0000-0002-4447-1213"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marzieh Fadaee","raw_affiliation_strings":["Zeta Alpha, Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"Zeta Alpha, Amsterdam, Netherlands","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030647281","display_name":"Rodrigo Nogueira","orcid":"https://orcid.org/0000-0002-2600-6035"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rodrigo Nogueira","raw_affiliation_strings":["Zeta Alpha, NeuralMind, University of Campinas, &amp; University of Waterloo, Amsterdam, Netherlands"],"affiliations":[{"raw_affiliation_string":"Zeta Alpha, NeuralMind, University of Campinas, &amp; University of Waterloo, Amsterdam, Netherlands","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5042382604"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":8.5566,"has_fulltext":false,"cited_by_count":87,"citation_normalized_percentile":{"value":0.98383678,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"2387","last_page":"2392"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.894753098487854},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6176137328147888},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5712593197822571},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.550075888633728},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5280975699424744},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.525867223739624},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5076931715011597},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.49951767921447754},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.4916391968727112},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.47475317120552063},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.44119706749916077},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4188939929008484},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.41812649369239807},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.41375842690467834},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.19284141063690186}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.894753098487854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6176137328147888},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5712593197822571},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.550075888633728},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5280975699424744},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.525867223739624},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5076931715011597},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.49951767921447754},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.4916391968727112},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.47475317120552063},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.44119706749916077},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4188939929008484},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.41812649369239807},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.41375842690467834},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.19284141063690186},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3477495.3531863","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3477495.3531863","pdf_url":null,"source":{"id":"https://openalex.org/S4363608773","display_name":"Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 45th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2000431947","https://openalex.org/W2096937925","https://openalex.org/W2489487449","https://openalex.org/W2610850660","https://openalex.org/W2622338386","https://openalex.org/W2740492458","https://openalex.org/W2741632195","https://openalex.org/W2798658104","https://openalex.org/W2945127593","https://openalex.org/W2981852735","https://openalex.org/W2998184481","https://openalex.org/W2998702515","https://openalex.org/W3021052948","https://openalex.org/W3035547741","https://openalex.org/W3098341425","https://openalex.org/W3099446234","https://openalex.org/W3100107515","https://openalex.org/W3134281122","https://openalex.org/W3156836409","https://openalex.org/W3172642864","https://openalex.org/W3175902990","https://openalex.org/W3180230246","https://openalex.org/W3205525806","https://openalex.org/W4205523161","https://openalex.org/W4288089799","https://openalex.org/W4404783242"],"related_works":["https://openalex.org/W4288267738","https://openalex.org/W2964413124","https://openalex.org/W4388937922","https://openalex.org/W3113264705","https://openalex.org/W2576964996","https://openalex.org/W2130553454","https://openalex.org/W3022007134","https://openalex.org/W4287644835","https://openalex.org/W4317548404","https://openalex.org/W2087783760"],"abstract_inverted_index":{"The":[0],"Information":[1],"Retrieval":[2],"(IR)":[3],"community":[4],"has":[5,31,59],"recently":[6,117],"witnessed":[7],"a":[8,69],"revolution":[9,21],"due":[10],"to":[11,36,68],"large":[12,87],"pretrained":[13,88],"transformer":[14],"models.":[15,77],"Another":[16],"key":[17],"ingredient":[18],"for":[19,95],"this":[20,79],"was":[22],"the":[23,73,83],"MS":[24],"MARCO":[25],"dataset,":[26],"whose":[27],"scale":[28],"and":[29,44,125],"diversity":[30],"enabled":[32],"zero-shot":[33],"transfer":[34],"learning":[35],"various":[37,56],"tasks.":[38,97],"However,":[39],"not":[40],"all":[41],"IR":[42,96],"tasks":[43,58],"domains":[45],"can":[46],"benefit":[47],"from":[48],"one":[49],"single":[50],"dataset":[51],"equally.":[52],"Extensive":[53],"research":[54],"in":[55],"NLP":[57],"shown":[60],"that":[61,100],"using":[62],"domain-specific":[63],"training":[64],"data,":[65],"as":[66,91,112,114,116],"opposed":[67],"general-purpose":[70],"one,":[71],"improves":[72],"performance":[74],"of":[75,86],"neural":[76],"In":[78],"work,":[80],"we":[81],"harness":[82],"few-shot":[84],"capabilities":[85],"language":[89],"models":[90,101],"synthetic":[92,106],"data":[93,126],"generators":[94],"We":[98],"show":[99],"finetuned":[102],"solely":[103],"on":[104],"our":[105],"datasets":[107],"outperform":[108],"strong":[109],"baselines":[110],"such":[111],"BM25":[113],"well":[115],"proposed":[118],"self-supervised":[119],"dense":[120],"retrieval":[121],"methods.":[122],"Code,":[123],"models,":[124],"are":[127],"available":[128],"at":[129],"https://github.com/zetaalphavector/inpars.":[130]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":34},{"year":2024,"cited_by_count":21},{"year":2023,"cited_by_count":27}],"updated_date":"2026-04-12T07:58:50.170612","created_date":"2025-10-10T00:00:00"}
