{"id":"https://openalex.org/W6888474649","doi":"https://doi.org/10.21227/vdm4-k186","title":"Replication Data for: Retrieval-Augmented Generation for Service Discovery: Chunking Strategies and Benchmarking","display_name":"Replication Data for: Retrieval-Augmented Generation for Service Discovery: Chunking Strategies and Benchmarking","publication_year":2025,"publication_date":"2025-03-07","ids":{"openalex":"https://openalex.org/W6888474649","doi":"https://doi.org/10.21227/vdm4-k186"},"language":"en","primary_location":{"id":"doi:10.21227/vdm4-k186","is_oa":true,"landing_page_url":"https://doi.org/10.21227/vdm4-k186","pdf_url":null,"source":{"id":"https://openalex.org/S7407051695","display_name":"IEEE DataPort","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.21227/vdm4-k186","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Pesl, Robin D.","orcid":null},"institutions":[{"id":"https://openalex.org/I100066346","display_name":"University of Stuttgart","ror":"https://ror.org/04vnq7t77","country_code":"DE","type":"education","lineage":["https://openalex.org/I100066346"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Pesl, Robin D.","raw_affiliation_strings":["University of Stuttgart"],"affiliations":[{"raw_affiliation_string":"University of Stuttgart","institution_ids":["https://openalex.org/I100066346"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mathew, Jerin G.","orcid":null},"institutions":[{"id":"https://openalex.org/I861853513","display_name":"Sapienza University of Rome","ror":"https://ror.org/02be6w209","country_code":"IT","type":"education","lineage":["https://openalex.org/I861853513"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Mathew, Jerin G.","raw_affiliation_strings":["Sapienza Universit\u00e0 di Roma"],"affiliations":[{"raw_affiliation_string":"Sapienza Universit\u00e0 di Roma","institution_ids":["https://openalex.org/I861853513"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mecella, Massimo","orcid":null},"institutions":[{"id":"https://openalex.org/I861853513","display_name":"Sapienza University of Rome","ror":"https://ror.org/02be6w209","country_code":"IT","type":"education","lineage":["https://openalex.org/I861853513"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Mecella, Massimo","raw_affiliation_strings":["Sapienza Universit\u00e0 di Roma"],"affiliations":[{"raw_affiliation_string":"Sapienza Universit\u00e0 di Roma","institution_ids":["https://openalex.org/I861853513"]}]},{"author_position":"last","author":{"id":null,"display_name":"Aiello, Marco","orcid":null},"institutions":[{"id":"https://openalex.org/I100066346","display_name":"University of Stuttgart","ror":"https://ror.org/04vnq7t77","country_code":"DE","type":"education","lineage":["https://openalex.org/I100066346"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Aiello, Marco","raw_affiliation_strings":["University of Stuttgart"],"affiliations":[{"raw_affiliation_string":"University of Stuttgart","institution_ids":["https://openalex.org/I100066346"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I100066346"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":null,"topics":[],"keywords":[{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.6933000087738037},{"id":"https://openalex.org/keywords/chunking","display_name":"Chunking (psychology)","score":0.6694999933242798},{"id":"https://openalex.org/keywords/documentation","display_name":"Documentation","score":0.6399000287055969},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5737000107765198},{"id":"https://openalex.org/keywords/service-discovery","display_name":"Service discovery","score":0.5449000000953674},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.516700029373169}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8043000102043152},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.6933000087738037},{"id":"https://openalex.org/C203357204","wikidata":"https://www.wikidata.org/wiki/Q1089605","display_name":"Chunking (psychology)","level":2,"score":0.6694999933242798},{"id":"https://openalex.org/C56666940","wikidata":"https://www.wikidata.org/wiki/Q788790","display_name":"Documentation","level":2,"score":0.6399000287055969},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5737000107765198},{"id":"https://openalex.org/C144097018","wikidata":"https://www.wikidata.org/wiki/Q4329404","display_name":"Service discovery","level":3,"score":0.5449000000953674},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.516700029373169},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.49570000171661377},{"id":"https://openalex.org/C2780378061","wikidata":"https://www.wikidata.org/wiki/Q25351891","display_name":"Service (business)","level":2,"score":0.3806000053882599},{"id":"https://openalex.org/C12590798","wikidata":"https://www.wikidata.org/wiki/Q3933199","display_name":"Replication (statistics)","level":2,"score":0.36059999465942383},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3411000072956085},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31060001254081726},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3102000057697296},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.27000001072883606},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.2653000056743622},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.25949999690055847},{"id":"https://openalex.org/C35578498","wikidata":"https://www.wikidata.org/wiki/Q193424","display_name":"Web service","level":2,"score":0.2547999918460846}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21227/vdm4-k186","is_oa":true,"landing_page_url":"https://doi.org/10.21227/vdm4-k186","pdf_url":null,"source":{"id":"https://openalex.org/S7407051695","display_name":"IEEE DataPort","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"doi:10.21227/vdm4-k186","is_oa":true,"landing_page_url":"https://doi.org/10.21227/vdm4-k186","pdf_url":null,"source":{"id":"https://openalex.org/S7407051695","display_name":"IEEE DataPort","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Integrating":[0],"multiple":[1],"(sub-)systems":[2],"is":[3,82],"essential":[4],"to":[5,50,71,86,116,221,228,255],"create":[6],"advanced":[7],"Information":[8],"Systems.":[9],"Difficulties":[10],"mainly":[11],"arise":[12],"when":[13],"integrating":[14],"dynamic":[15],"environments,":[16],"e.g.,":[17],"the":[18,38,42,92,98,109,118,124,131,136,155,188,194,201,208,212,230,259],"integration":[19],"at":[20],"design":[21],"time":[22],"of":[23,41,53,100,113,154],"not":[24],"yet":[25],"existing":[26],"services.":[27],"This":[28],"has":[29],"been":[30],"traditionally":[31],"addressed":[32],"using":[33,171,211],"a":[34,146,152,173,181],"registry":[35],"that":[36,149,236],"provides":[37],"API":[39,78,89],"documentation":[40,65],"endpoints.":[43],"Large":[44],"Language":[45],"Models":[46],"(LLMs)":[47],"have":[48],"shown":[49],"be":[51],"capable":[52],"automatically":[54],"creating":[55],"system":[56],"integrations":[57],"(e.g.,":[58],"as":[59],"service":[60,176],"composition)":[61],"based":[62],"on":[63,163,246],"this":[64],"but":[66],"require":[67],"concise":[68],"input":[69,72,119,132],"due":[70],"token":[73,120,133,231],"limitations,":[74],"especially":[75],"regarding":[76],"comprehensive":[77],"descriptions.":[79,90],"Currently,":[80],"it":[81],"unknown":[83],"how":[84,220],"best":[85],"preprocess":[87],"these":[88],"In":[91],"present":[93],"work,":[94],"we":[95,143,206],"(i)":[96],"analyze":[97],"usage":[99],"Retrieval":[101],"Augmented":[102],"Generation":[103],"(RAG)":[104],"for":[105,135,168,193,225,243,261],"endpoint":[106,141,169,202,226],"discovery":[107,170,177,227],"and":[108,139,159,187,198],"chunking,":[110],"i.e.,":[111],"preprocessing,":[112],"state-of-practice":[114],"OpenAPIs":[115],"reduce":[117,130,229],"length":[121,134],"while":[122,252],"preserving":[123],"most":[125,156],"relevant":[126,157],"information.":[127],"To":[128],"further":[129,262],"composition":[137],"prompt":[138],"improve":[140],"retrieval,":[142],"propose":[144],"(ii)":[145],"Discovery":[147,209],"Agent":[148,210],"only":[150],"receives":[151],"summary":[153],"endpoints":[158],"retrieves":[160],"specification":[161],"details":[162],"demand.":[164],"We":[165],"evaluate":[166],"RAG":[167,224],"(iii)":[172],"proposed":[174],"novel":[175],"benchmark":[178],"SOCBench-D":[179],"representing":[180],"general":[182],"setting":[183],"across":[184],"numerous":[185],"domains":[186],"real-world":[189],"RestBench":[190],"benchmark,":[191],"first,":[192],"different":[195],"chunking":[196,241],"possibilities":[197],"parameters":[199],"measuring":[200],"retrieval":[203],"accuracy.":[204],"Then,":[205],"assess":[207],"same":[213],"test":[214],"data":[215],"set.":[216],"The":[217],"prototype":[218],"shows":[219],"successfully":[222],"employ":[223],"count.":[232],"Our":[233],"experiments":[234],"show":[235],"endpoint-based":[237],"approaches":[238],"outperform":[239],"naive":[240],"methods":[242],"preprocessing.":[244],"Relying":[245],"an":[247],"agent":[248],"significantly":[249],"improves":[250],"precision":[251],"being":[253],"prone":[254],"decrease":[256],"recall,":[257],"disclosing":[258],"need":[260],"reasoning":[263],"capabilities.":[264]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
