{"id":"https://openalex.org/W2090146924","doi":"https://doi.org/10.1007/s10115-003-0121-x","title":"Building Minority Language Corpora by Learning to Generate Web Search Queries","display_name":"Building Minority Language Corpora by Learning to Generate Web Search Queries","publication_year":2004,"publication_date":"2004-07-22","ids":{"openalex":"https://openalex.org/W2090146924","doi":"https://doi.org/10.1007/s10115-003-0121-x","mag":"2090146924"},"language":"en","primary_location":{"id":"doi:10.1007/s10115-003-0121-x","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s10115-003-0121-x","pdf_url":null,"source":{"id":"https://openalex.org/S81770430","display_name":"Knowledge and Information Systems","issn_l":"0219-1377","issn":["0219-1377","0219-3116"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Knowledge and Information Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081839926","display_name":"Rayid Ghani","orcid":"https://orcid.org/0000-0003-0235-1843"},"institutions":[{"id":"https://openalex.org/I1310439424","display_name":"Accenture (Switzerland)","ror":"https://ror.org/041r3e346","country_code":"CH","type":"company","lineage":["https://openalex.org/I1310439424","https://openalex.org/I4210093804"]},{"id":"https://openalex.org/I4210099672","display_name":"Accenture (United States)","ror":"https://ror.org/013g16z83","country_code":"US","type":"company","lineage":["https://openalex.org/I4210093804","https://openalex.org/I4210099672"]}],"countries":["CH","US"],"is_corresponding":true,"raw_author_name":"Rayid Ghani","raw_affiliation_strings":["Accenture Technology Labs, 161 N. Clark St., Chicago, IL, 60601, USA","Accenture Technology Labs, 161 N. Clark St., 60601, Chicago, IL, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Accenture Technology Labs, 161 N. Clark St., Chicago, IL, 60601, USA","institution_ids":["https://openalex.org/I4210099672"]},{"raw_affiliation_string":"Accenture Technology Labs, 161 N. Clark St., 60601, Chicago, IL, USA#TAB#","institution_ids":["https://openalex.org/I1310439424"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000992993","display_name":"Rosie Jones","orcid":"https://orcid.org/0009-0000-3821-1207"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rosie Jones","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, 5000 Forbes Ave, PA, USA","Carnegie Mellon University, 5000 Forbes Ave, 60601, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, 5000 Forbes Ave, PA, USA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University, 5000 Forbes Ave, 60601, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063095022","display_name":"Dunja Mladeni\u0107","orcid":"https://orcid.org/0000-0002-0360-6505"},"institutions":[{"id":"https://openalex.org/I3006985408","display_name":"Jo\u017eef Stefan Institute","ror":"https://ror.org/05060sz93","country_code":"SI","type":"facility","lineage":["https://openalex.org/I3006985408"]}],"countries":["SI"],"is_corresponding":false,"raw_author_name":"Dunja Mladenic","raw_affiliation_strings":["J. Stefan Institute, Ljubljana, Jamova 39, 1000, Slovenia","J. Stefan Institute, Jamova 39, 1000, Ljubljana, PA, Slovenia#TAB#"],"affiliations":[{"raw_affiliation_string":"J. Stefan Institute, Ljubljana, Jamova 39, 1000, Slovenia","institution_ids":["https://openalex.org/I3006985408"]},{"raw_affiliation_string":"J. Stefan Institute, Jamova 39, 1000, Ljubljana, PA, Slovenia#TAB#","institution_ids":["https://openalex.org/I3006985408"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5081839926"],"corresponding_institution_ids":["https://openalex.org/I1310439424","https://openalex.org/I4210099672"],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890},"apc_paid":null,"fwci":5.0834,"has_fulltext":false,"cited_by_count":35,"citation_normalized_percentile":{"value":0.95195015,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"7","issue":"1","first_page":"56","last_page":"83"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9768000245094299,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7822691202163696},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5379548668861389},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5290070176124573},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5043565034866333},{"id":"https://openalex.org/keywords/web-search-query","display_name":"Web search query","score":0.49444127082824707},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.42035791277885437},{"id":"https://openalex.org/keywords/search-engine","display_name":"Search engine","score":0.3266541361808777}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7822691202163696},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5379548668861389},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5290070176124573},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5043565034866333},{"id":"https://openalex.org/C164120249","wikidata":"https://www.wikidata.org/wiki/Q995982","display_name":"Web search query","level":3,"score":0.49444127082824707},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.42035791277885437},{"id":"https://openalex.org/C97854310","wikidata":"https://www.wikidata.org/wiki/Q19541","display_name":"Search engine","level":2,"score":0.3266541361808777}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1007/s10115-003-0121-x","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s10115-003-0121-x","pdf_url":null,"source":{"id":"https://openalex.org/S81770430","display_name":"Knowledge and Information Systems","issn_l":"0219-1377","issn":["0219-1377","0219-3116"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Knowledge and Information Systems","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.28.8831","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.28.8831","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.cmu.edu/~TextLearning/corpusbuilder/papers/CMU-CALD-01-100.ps.gz","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.58.3113","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.58.3113","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.69.9089","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.69.9089","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.cmu.edu/afs/cs/project/theo-4/text-learning/www/corpusbuilder/papers/CMU-CALD-01-100.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6499999761581421,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320334771","display_name":"Science and Engineering Research Board","ror":"https://ror.org/03ffdsr55"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1485536830","https://openalex.org/W1485997076","https://openalex.org/W1507051419","https://openalex.org/W1508165687","https://openalex.org/W1533946607","https://openalex.org/W1549887922","https://openalex.org/W1987777228","https://openalex.org/W2006969979","https://openalex.org/W2009716188","https://openalex.org/W2029137225","https://openalex.org/W2118996379","https://openalex.org/W2124673015","https://openalex.org/W2129624222","https://openalex.org/W2137845970","https://openalex.org/W2145080939","https://openalex.org/W2167075392","https://openalex.org/W2435251607","https://openalex.org/W2586241512","https://openalex.org/W2998215494","https://openalex.org/W4247346926"],"related_works":["https://openalex.org/W1978230837","https://openalex.org/W2332278683","https://openalex.org/W167737004","https://openalex.org/W1964038241","https://openalex.org/W2185038817","https://openalex.org/W2004064649","https://openalex.org/W2385957133","https://openalex.org/W3590553","https://openalex.org/W2096432151","https://openalex.org/W1988452794"],"abstract_inverted_index":null,"counts_by_year":[{"year":2021,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":5}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
