{"id":"https://openalex.org/W4417462373","doi":"https://doi.org/10.1145/3786167.3788414","title":"Beyond Task Completion: An Assessment Framework for Evaluating Agentic AI Systems","display_name":"Beyond Task Completion: An Assessment Framework for Evaluating Agentic AI Systems","publication_year":2026,"publication_date":"2026-04-12","ids":{"openalex":"https://openalex.org/W4417462373","doi":"https://doi.org/10.1145/3786167.3788414"},"language":null,"primary_location":{"id":"doi:10.1145/3786167.3788414","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3786167.3788414","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Workshop on Agentic Engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3786167.3788414","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115902703","display_name":"Sreemaee Akshathala","orcid":null},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sreemaee Akshathala","raw_affiliation_strings":["Software Engineering Research Center, IIIT Hyderabad, Hyderabad, Telangana, India"],"raw_orcid":"https://orcid.org/0009-0000-6474-4496","affiliations":[{"raw_affiliation_string":"Software Engineering Research Center, IIIT Hyderabad, Hyderabad, Telangana, India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116142621","display_name":"Bassam Adnan","orcid":null},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Bassam Adnan","raw_affiliation_strings":["Software Engineering Research Center, IIIT Hyderabad, Hyderabad, Telangana, India"],"raw_orcid":"https://orcid.org/0009-0003-0344-2111","affiliations":[{"raw_affiliation_string":"Software Engineering Research Center, IIIT Hyderabad, Hyderabad, Telangana, India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Mahisha Ramesh","orcid":"https://orcid.org/0009-0004-0823-9048"},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Mahisha Ramesh","raw_affiliation_strings":["Software Engineering Research Center, IIIT Hyderabad, Hyderabad, Telangana, India"],"raw_orcid":"https://orcid.org/0009-0004-0823-9048","affiliations":[{"raw_affiliation_string":"Software Engineering Research Center, IIIT Hyderabad, Hyderabad, Telangana, India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081488877","display_name":"Karthik Vaidhyanathan","orcid":"https://orcid.org/0000-0003-2317-6175"},"institutions":[{"id":"https://openalex.org/I65181880","display_name":"Indian Institute of Technology Hyderabad","ror":"https://ror.org/01j4v3x97","country_code":"IN","type":"education","lineage":["https://openalex.org/I65181880"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Karthik Vaidhyanathan","raw_affiliation_strings":["Software Engineering Research Center, IIIT Hyderabad, India, Hyderabad, Telangana, India"],"raw_orcid":"https://orcid.org/0000-0003-2317-6175","affiliations":[{"raw_affiliation_string":"Software Engineering Research Center, IIIT Hyderabad, India, Hyderabad, Telangana, India","institution_ids":["https://openalex.org/I65181880"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115902702","display_name":"Basil Muhammed","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127127","display_name":"Bangalore Diabetes Centre","ror":"https://ror.org/033q83c72","country_code":"IN","type":"healthcare","lineage":["https://openalex.org/I4210127127"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Basil Muhammed","raw_affiliation_strings":["MontyCloud, Bangalore, Karnataka, India"],"raw_orcid":"https://orcid.org/0009-0005-0683-5636","affiliations":[{"raw_affiliation_string":"MontyCloud, Bangalore, Karnataka, India","institution_ids":["https://openalex.org/I4210127127"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033292576","display_name":"Kannan Parthasarathy","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127127","display_name":"Bangalore Diabetes Centre","ror":"https://ror.org/033q83c72","country_code":"IN","type":"healthcare","lineage":["https://openalex.org/I4210127127"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Kannan Parthasarathy","raw_affiliation_strings":["MontyCloud, Bangalore, Karnataka, India"],"raw_orcid":"https://orcid.org/0009-0001-2497-7639","affiliations":[{"raw_affiliation_string":"MontyCloud, Bangalore, Karnataka, India","institution_ids":["https://openalex.org/I4210127127"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.01520695,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"9","last_page":"17"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.4426000118255615,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.4426000118255615,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10703","display_name":"Business Process Modeling and Analysis","score":0.06300000101327896,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13643","display_name":"Artificial Intelligence in Law","score":0.05849999934434891,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6693000197410583},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.40549999475479126},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.3578999936580658},{"id":"https://openalex.org/keywords/software-system","display_name":"Software system","score":0.35179999470710754},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.34049999713897705},{"id":"https://openalex.org/keywords/software-agent","display_name":"Software agent","score":0.27900001406669617}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7055000066757202},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6693000197410583},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.40549999475479126},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4043999910354614},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3885999917984009},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.37229999899864197},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3578999936580658},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.35179999470710754},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.34049999713897705},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.3188000023365021},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.30649998784065247},{"id":"https://openalex.org/C539667460","wikidata":"https://www.wikidata.org/wiki/Q2414942","display_name":"Management science","level":1,"score":0.2867000102996826},{"id":"https://openalex.org/C5894958","wikidata":"https://www.wikidata.org/wiki/Q2297769","display_name":"Software agent","level":2,"score":0.27900001406669617},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.27219998836517334},{"id":"https://openalex.org/C127627568","wikidata":"https://www.wikidata.org/wiki/Q1639361","display_name":"Sociotechnical system","level":2,"score":0.26820001006126404},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.26019999384880066},{"id":"https://openalex.org/C47822265","wikidata":"https://www.wikidata.org/wiki/Q854457","display_name":"Complex system","level":2,"score":0.2538999915122986}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3786167.3788414","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3786167.3788414","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Workshop on Agentic Engineering","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2512.12791","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2512.12791","pdf_url":"https://arxiv.org/pdf/2512.12791","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2512.12791","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.12791","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.1145/3786167.3788414","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3786167.3788414","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Workshop on Agentic Engineering","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Recent":[0],"advances":[1],"in":[2,75],"agentic":[3,118],"AI":[4],"have":[5,72],"shifted":[6],"the":[7,60,76,92,126],"focus":[8],"from":[9],"standalone":[10],"Large":[11],"Language":[12],"Models":[13],"(LLMs)":[14],"to":[15,27,45,114,129],"integrated":[16],"systems":[17,62,89,119],"that":[18,112],"combine":[19],"LLMs":[20],"with":[21,137,143],"tools,":[22,131],"memory,":[23,135],"and":[24,38,54,59,133,140],"other":[25,138],"agents":[26,44,58],"perform":[28],"complex":[29,48],"tasks.":[30],"These":[31],"multi-agent":[32,61],"architectures":[33],"enable":[34],"coordinated":[35],"reasoning,":[36],"planning,":[37],"execution":[39],"across":[40],"diverse":[41],"domains,":[42],"allowing":[43],"collaboratively":[46],"automate":[47],"workflows.":[49],"Despite":[50],"these":[51],"advances,":[52],"evaluation":[53],"assessment":[55],"of":[56,95],"LLM":[57],"they":[63],"constitute":[64],"remain":[65],"a":[66],"fundamental":[67],"challenge.":[68],"Although":[69],"various":[70],"approaches":[71],"been":[73],"proposed":[74],"software":[77,83],"engineering":[78],"literature":[79],"for":[80,87],"evaluating":[81],"conventional":[82],"components,":[84],"existing":[85,105],"methods":[86],"AI-based":[88],"often":[90],"overlook":[91],"non-deterministic":[93],"nature":[94],"models.":[96],"This":[97],"non-determinism":[98],"introduces":[99],"behavioral":[100],"uncertainty":[101],"during":[102],"execution,":[103],"yet":[104],"evaluations":[106],"rely":[107],"on":[108],"binary":[109],"task-completion":[110],"metrics":[111],"fail":[113],"capture":[115],"it.":[116],"Evaluating":[117],"therefore":[120],"requires":[121],"examining":[122],"additional":[123],"dimensions,":[124],"including":[125],"agent\u2019s":[127],"ability":[128],"invoke":[130],"ingest":[132],"retrieve":[134],"collaborate":[136],"agents,":[139],"interact":[141],"effectively":[142],"its":[144],"environment.":[145]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-17T00:00:00"}
