{"id":"https://openalex.org/W7148395335","doi":"https://doi.org/10.48550/arxiv.2604.00258","title":"Hierarchical Apprenticeship Learning from Imperfect Demonstrations with Evolving Rewards","display_name":"Hierarchical Apprenticeship Learning from Imperfect Demonstrations with Evolving Rewards","publication_year":2026,"publication_date":"2026-03-31","ids":{"openalex":"https://openalex.org/W7148395335","doi":"https://doi.org/10.48550/arxiv.2604.00258"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.00258","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00258","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.00258","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132818410","display_name":"Md Mirajul Islam","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Islam, Md Mirajul","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069656481","display_name":"Rajesh Debnath","orcid":"https://orcid.org/0000-0002-5460-9773"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Debnath, Rajesh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080195943","display_name":"Adittya Soukarjya Saha","orcid":"https://orcid.org/0000-0001-6344-9663"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saha, Adittya Soukarjya","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132822926","display_name":"Min Chi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chi, Min","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5132818410"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.7940999865531921,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.7940999865531921,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10533","display_name":"Teaching and Learning Programming","score":0.03550000116229057,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11122","display_name":"Online Learning and Analytics","score":0.021800000220537186,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/imperfect","display_name":"Imperfect","score":0.873199999332428},{"id":"https://openalex.org/keywords/apprenticeship","display_name":"Apprenticeship","score":0.79830002784729},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6118999719619751},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5884000062942505},{"id":"https://openalex.org/keywords/perfect-information","display_name":"Perfect information","score":0.4000999927520752},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.3887999951839447}],"concepts":[{"id":"https://openalex.org/C2780310539","wikidata":"https://www.wikidata.org/wiki/Q12547192","display_name":"Imperfect","level":2,"score":0.873199999332428},{"id":"https://openalex.org/C107806365","wikidata":"https://www.wikidata.org/wiki/Q253567","display_name":"Apprenticeship","level":2,"score":0.79830002784729},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6779999732971191},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6118999719619751},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5884000062942505},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41190001368522644},{"id":"https://openalex.org/C123676819","wikidata":"https://www.wikidata.org/wiki/Q1074338","display_name":"Perfect information","level":2,"score":0.4000999927520752},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3887999951839447},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.30630001425743103},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.30169999599456787},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.2939999997615814},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.28529998660087585},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.27790001034736633},{"id":"https://openalex.org/C2780799671","wikidata":"https://www.wikidata.org/wiki/Q17087362","display_name":"Transient (computer programming)","level":2,"score":0.2768000066280365},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.25619998574256897},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.25060001015663147}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.00258","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00258","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.00258","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.00258","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.5205841660499573,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"While":[0],"apprenticeship":[1],"learning":[2,103,153],"has":[3],"shown":[4],"promise":[5],"for":[6],"inducing":[7],"effective":[8],"pedagogical":[9,164],"policies":[10],"directly":[11],"from":[12,84,121,145],"student":[13,33,62,95,107,131,163],"interactions":[14],"in":[15],"e-learning":[16],"environments,":[17],"most":[18],"existing":[19],"approaches":[20,167],"rely":[21,169],"on":[22,170],"optimal":[23,171],"or":[24,175],"near-optimal":[25],"expert":[26],"demonstrations":[27,63],"under":[28],"a":[29,101],"fixed":[30,173],"reward.":[31],"Real-world":[32],"interactions,":[34],"however,":[35],"are":[36,64],"often":[37],"inherently":[38],"imperfect":[39,61,177],"and":[40,48,119,148],"evolving:":[41],"students":[42],"explore,":[43],"make":[44],"errors,":[45],"revise":[46],"strategies,":[47],"refine":[49],"their":[50,73],"goals":[51],"as":[52],"understanding":[53],"develops.":[54],"In":[55],"this":[56],"work,":[57],"we":[58],"argue":[59],"that":[60,158,168],"not":[65,91],"noise":[66],"to":[67],"be":[68],"discarded,":[69],"but":[70,97],"structured":[71],"signals-provided":[72],"relative":[74],"quality":[75,137],"is":[76],"ranked.":[77],"We":[78],"introduce":[79],"HALIDE,":[80],"Hierarchical":[81],"Apprenticeship":[82],"Learning":[83],"Imperfect":[85],"Demonstrations":[86],"with":[87],"Evolving":[88],"Rewards,":[89],"which":[90],"only":[92],"leverages":[93],"sub-optimal":[94],"demonstrations,":[96],"ranks":[98],"them":[99],"within":[100],"hierarchical":[102,139],"framework.":[104],"HALIDE":[105,159],"models":[106],"behavior":[108],"at":[109],"multiple":[110],"levels":[111],"of":[112,116,130],"abstraction,":[113],"enabling":[114],"inference":[115],"higher-level":[117,152],"intent":[118],"strategy":[120],"suboptimal":[122,146],"actions":[123],"while":[124],"explicitly":[125],"capturing":[126],"the":[127],"temporal":[128],"evolution":[129],"reward":[132,140],"functions.":[133],"By":[134],"integrating":[135],"demonstration":[136],"into":[138],"inference,HALIDE":[141],"distinguishes":[142],"transient":[143],"errors":[144],"strategies":[147],"meaningful":[149],"progress":[150],"toward":[151],"goals.":[154],"Our":[155],"results":[156],"show":[157],"more":[160],"accurately":[161],"predicts":[162],"decisions":[165],"than":[166],"trajectories,":[172],"rewards,":[174],"unranked":[176],"demonstrations.":[178]},"counts_by_year":[],"updated_date":"2026-04-30T09:15:22.047038","created_date":"2026-04-03T00:00:00"}
