{"id":"https://openalex.org/W2472952455","doi":"https://doi.org/10.1145/2911451.2914688","title":"Improving Language Estimation with the Paragraph Vector Model for Ad-hoc Retrieval","display_name":"Improving Language Estimation with the Paragraph Vector Model for Ad-hoc Retrieval","publication_year":2016,"publication_date":"2016-07-07","ids":{"openalex":"https://openalex.org/W2472952455","doi":"https://doi.org/10.1145/2911451.2914688","mag":"2472952455"},"language":"en","primary_location":{"id":"doi:10.1145/2911451.2914688","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2911451.2914688","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 39th International ACM SIGIR conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089655391","display_name":"Qingyao Ai","orcid":"https://orcid.org/0000-0002-5030-709X"},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Qingyao Ai","raw_affiliation_strings":["University of Massachusetts Amherst, Amherst, MA, USA"],"affiliations":[{"raw_affiliation_string":"University of Massachusetts Amherst, Amherst, MA, USA","institution_ids":["https://openalex.org/I24603500"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100693653","display_name":"Yang Liu","orcid":"https://orcid.org/0000-0002-4393-1791"},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Liu Yang","raw_affiliation_strings":["University of Massachusetts Amherst, Amherst, MA, USA"],"affiliations":[{"raw_affiliation_string":"University of Massachusetts Amherst, Amherst, MA, USA","institution_ids":["https://openalex.org/I24603500"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088621320","display_name":"Jiafeng Guo","orcid":"https://orcid.org/0000-0002-9509-8674"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiafeng Guo","raw_affiliation_strings":["Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I19820366"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5105659698","display_name":"W. Bruce Croft","orcid":"https://orcid.org/0000-0003-2391-9629"},"institutions":[{"id":"https://openalex.org/I24603500","display_name":"University of Massachusetts Amherst","ror":"https://ror.org/0072zz521","country_code":"US","type":"education","lineage":["https://openalex.org/I24603500"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"W. Bruce Croft","raw_affiliation_strings":["University of Massachusetts Amherst, Amherst, MA, USA"],"affiliations":[{"raw_affiliation_string":"University of Massachusetts Amherst, Amherst, MA, USA","institution_ids":["https://openalex.org/I24603500"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5089655391"],"corresponding_institution_ids":["https://openalex.org/I24603500"],"apc_list":null,"apc_paid":null,"fwci":13.2826,"has_fulltext":false,"cited_by_count":35,"citation_normalized_percentile":{"value":0.98715414,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"869","last_page":"872"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10286","display_name":"Information Retrieval and Search Behavior","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9921000003814697,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8535162806510925},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6919300556182861},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.6830857396125793},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.6804324984550476},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6110644936561584},{"id":"https://openalex.org/keywords/paragraph","display_name":"Paragraph","score":0.5917174816131592},{"id":"https://openalex.org/keywords/word-embedding","display_name":"Word embedding","score":0.46217507123947144},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.4457992911338806},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.44528594613075256},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4364749789237976},{"id":"https://openalex.org/keywords/perplexity","display_name":"Perplexity","score":0.43560630083084106},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.37223440408706665},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3371117115020752},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.25182217359542847},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.22477984428405762}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8535162806510925},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6919300556182861},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.6830857396125793},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.6804324984550476},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6110644936561584},{"id":"https://openalex.org/C2777206241","wikidata":"https://www.wikidata.org/wiki/Q194431","display_name":"Paragraph","level":2,"score":0.5917174816131592},{"id":"https://openalex.org/C2777462759","wikidata":"https://www.wikidata.org/wiki/Q18395344","display_name":"Word embedding","level":3,"score":0.46217507123947144},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.4457992911338806},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.44528594613075256},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4364749789237976},{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.43560630083084106},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37223440408706665},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3371117115020752},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.25182217359542847},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.22477984428405762},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2911451.2914688","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2911451.2914688","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 39th International ACM SIGIR conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.8199999928474426,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W941230081","https://openalex.org/W2022871093","https://openalex.org/W2024932032","https://openalex.org/W2042980227","https://openalex.org/W2055981215","https://openalex.org/W2068297964","https://openalex.org/W2093390569","https://openalex.org/W2125031621","https://openalex.org/W2130395434","https://openalex.org/W2131744502","https://openalex.org/W2136542423","https://openalex.org/W2153579005","https://openalex.org/W2163382007","https://openalex.org/W2250372124","https://openalex.org/W2604272474","https://openalex.org/W2949547296","https://openalex.org/W2950133940","https://openalex.org/W4206765718","https://openalex.org/W4240913316","https://openalex.org/W6624664881"],"related_works":["https://openalex.org/W2169518243","https://openalex.org/W2252095989","https://openalex.org/W4322096525","https://openalex.org/W2551914602","https://openalex.org/W4287323699","https://openalex.org/W4281893144","https://openalex.org/W2105076537","https://openalex.org/W2787311093","https://openalex.org/W2084531783","https://openalex.org/W2902731467"],"abstract_inverted_index":{"Incorporating":[0],"topic":[1,190],"level":[2],"estimation":[3],"into":[4,176],"language":[5,178,192],"models":[6,17],"has":[7],"been":[8],"shown":[9,40],"to":[10,75,81,95,136,164],"be":[11,125],"beneficial":[12],"for":[13,98],"information":[14,64],"retrieval":[15,21,65],"(IR)":[16],"such":[18,29],"as":[19,30],"cluster-based":[20],"and":[22,43,51,148,161],"LDA-based":[23],"document":[24,106,134],"representation.":[25],"Neural":[26],"embedding":[27],"models,":[28,34],"paragraph":[31],"vector":[32],"(PV)":[33],"on":[35],"the":[36,78,91,99,110,118,133,138,145,159,177,188],"other":[37],"hand":[38],"have":[39],"their":[41,61],"effectiveness":[42,62],"efficiency":[44],"in":[45,53,63],"learning":[46,146,154],"semantic":[47],"representations":[48],"of":[49,120],"documents":[50,142],"words":[52,122],"multiple":[54],"Natural":[55],"Language":[56],"Processing":[57],"(NLP)":[58],"tasks.":[59],"However,":[60],"is":[66],"mostly":[67],"unknown.":[68],"In":[69],"this":[70,172],"paper,":[71],"we":[72,103,129,150,181],"study":[73],"how":[74],"effectively":[76],"use":[77,104],"PV":[79,93,174],"model":[80,94,139,175],"improve":[82],"ad-hoc":[83],"retrieval.":[84],"We":[85],"propose":[86],"three":[87],"major":[88],"improvements":[89],"over":[90,132],"original":[92],"adapt":[96],"it":[97,184],"IR":[100],"scenario:":[101],"(1)":[102],"a":[105,152],"frequency-based":[107,112],"rather":[108],"than":[109],"corpus":[111],"negative":[113],"sampling":[114],"strategy":[115],"so":[116],"that":[117,183],"importance":[119],"frequent":[121],"will":[123],"not":[124],"suppressed":[126],"excessively;":[127],"(2)":[128],"introduce":[130],"regularization":[131],"representation":[135],"prevent":[137],"overfitting":[140],"short":[141],"along":[143],"with":[144],"iterations;":[147],"(3)":[149],"employ":[151],"joint":[153],"objective":[155],"which":[156],"considers":[157],"both":[158],"document-word":[160],"word-context":[162],"associations":[163],"produce":[165],"better":[166],"word":[167],"probability":[168],"estimation.":[169],"By":[170],"incorporating":[171],"enhanced":[173,191],"modeling":[179],"framework,":[180],"show":[182],"can":[185],"significantly":[186],"outperform":[187],"state-of-the-art":[189],"models.":[193]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":12},{"year":2018,"cited_by_count":5},{"year":2017,"cited_by_count":10},{"year":2016,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
