{"id":"https://openalex.org/W2883670913","doi":"https://doi.org/10.1145/3234944.3234964","title":"Generating Synthetic Data for Neural Keyword-to-Question Models","display_name":"Generating Synthetic Data for Neural Keyword-to-Question Models","publication_year":2018,"publication_date":"2018-09-10","ids":{"openalex":"https://openalex.org/W2883670913","doi":"https://doi.org/10.1145/3234944.3234964","mag":"2883670913"},"language":"en","primary_location":{"id":"doi:10.1145/3234944.3234964","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3234944.3234964","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 ACM SIGIR International Conference on Theory of Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1807.05324","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Heng Ding","orcid":null},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Heng Ding","raw_affiliation_strings":["Wuhan University, Wuhan, China"],"affiliations":[{"raw_affiliation_string":"Wuhan University, Wuhan, China","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"last","author":{"id":null,"display_name":"Krisztian Balog","orcid":null},"institutions":[{"id":"https://openalex.org/I92008406","display_name":"University of Stavanger","ror":"https://ror.org/02qte9q33","country_code":"NO","type":"education","lineage":["https://openalex.org/I92008406"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Krisztian Balog","raw_affiliation_strings":["University of Stavanger, Stavanger, Norway"],"affiliations":[{"raw_affiliation_string":"University of Stavanger, Stavanger, Norway","institution_ids":["https://openalex.org/I92008406"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I37461747"],"apc_list":null,"apc_paid":null,"fwci":0.5077,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.73702912,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"51","last_page":"58"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.6802999973297119},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6392999887466431},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.6111999750137329},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.5996000170707703},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5724999904632568},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5493000149726868},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4758000075817108},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.4375},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.41100001335144043}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8503000140190125},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7206000089645386},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.6802999973297119},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6392999887466431},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.6111999750137329},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.5996000170707703},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5724999904632568},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5493000149726868},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5008999705314636},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4758000075817108},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4731999933719635},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.4375},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.41100001335144043},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.39309999346733093},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.3481000065803528},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.33820000290870667},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.3375999927520752},{"id":"https://openalex.org/C2781170535","wikidata":"https://www.wikidata.org/wiki/Q30587856","display_name":"Noisy data","level":2,"score":0.32339999079704285},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3111000061035156},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3034000098705292},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.30329999327659607},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2734000086784363},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.26179999113082886},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.25870001316070557},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.25450000166893005}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3234944.3234964","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3234944.3234964","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 ACM SIGIR International Conference on Theory of Information Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1807.05324","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1807.05324","pdf_url":"https://arxiv.org/pdf/1807.05324","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1807.05324","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1807.05324","pdf_url":"https://arxiv.org/pdf/1807.05324","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W2053154970","https://openalex.org/W2070740689","https://openalex.org/W2090243146","https://openalex.org/W2101105183","https://openalex.org/W2126690248","https://openalex.org/W2149858260","https://openalex.org/W2153338211","https://openalex.org/W2156982127","https://openalex.org/W2159665776","https://openalex.org/W2341569833","https://openalex.org/W2587741066"],"related_works":[],"abstract_inverted_index":{"Search":[0],"typically":[1],"relies":[2],"on":[3,24],"keyword":[4,26,102],"queries,":[5,27],"but":[6],"these":[7],"are":[8,89],"often":[9],"semantically":[10],"ambiguous.":[11],"We":[12,120],"propose":[13],"to":[14,28,68,97,110],"overcome":[15],"this":[16,59,65],"by":[17],"offering":[18],"users":[19],"natural":[20,86],"language":[21,87],"questions,":[22],"based":[23],"their":[25,30],"disambiguate":[29],"intent.":[31],"This":[32],"keyword-to-question":[33],"task":[34],"may":[35],"be":[36],"addressed":[37],"using":[38,127],"neural":[39],"machine":[40],"translation":[41,44],"techniques.":[42],"Neural":[43],"models,":[45],"however,":[46],"require":[47],"massive":[48],"amounts":[49,71],"of":[50,64,72,81,117,124],"training":[51,74,114],"data":[52,75,115],"(keyword-question":[53],"pairs),":[54],"which":[55],"is":[56,67,116],"unavailable":[57],"for":[58],"task.":[60],"The":[61],"main":[62],"idea":[63],"paper":[66],"generate":[69,99],"large":[70,92],"synthetic":[73,113],"from":[76],"a":[77],"small":[78],"seed":[79],"set":[80],"hand-labeled":[82],"keyword-question":[83],"pairs.":[84],"Since":[85],"questions":[88],"available":[90],"in":[91],"quantities,":[93],"we":[94,105],"develop":[95],"models":[96],"automatically":[98],"the":[100,122],"corresponding":[101],"queries.":[103],"Further,":[104],"introduce":[106],"various":[107],"filtering":[108],"mechanisms":[109],"ensure":[111],"that":[112],"high":[118],"quality.":[119],"demonstrate":[121],"feasibility":[123],"our":[125],"approach":[126],"both":[128],"automatic":[129],"and":[130],"manual":[131],"evaluation.":[132]},"counts_by_year":[{"year":2020,"cited_by_count":3}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2018-08-03T00:00:00"}
