{"id":819,"date":"2020-01-05T19:07:58","date_gmt":"2020-01-05T19:07:58","guid":{"rendered":"https:\/\/elo-x.eu\/?p=819"},"modified":"2024-05-09T04:41:31","modified_gmt":"2024-05-09T04:41:31","slug":"yuan-zhang","status":"publish","type":"post","link":"https:\/\/elo-x.eu\/?p=819","title":{"rendered":"Yuan Zhang"},"content":{"rendered":"\t\t<div data-elementor-type=\"wp-post\" data-elementor-id=\"819\" class=\"elementor elementor-819\">\n\t\t\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-11ad091 elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"11ad091\" data-element_type=\"section\" data-e-type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-4645320\" data-id=\"4645320\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-9a05e75 elementor-widget elementor-widget-page-title\" data-id=\"9a05e75\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"page-title.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\n\t\t<div class=\"hfe-page-title hfe-page-title-wrapper elementor-widget-heading\">\n\n\t\t\t\t\t\t\t\t\t\t\t<a href=\"https:\/\/elo-x.eu\">\n\t\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">\n\t\t\t\t\t\t\t\t\n\t\t\t\tYuan Zhang  \n\t\t\t<\/h2 > \n\t\t\t\t\t\t\t\t\t<\/a>\n\t\t\t\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-ca86f70 my-divider elementor-widget-divider--view-line elementor-widget elementor-widget-divider\" data-id=\"ca86f70\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"divider.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<div class=\"elementor-divider\">\n\t\t\t<span class=\"elementor-divider-separator\">\n\t\t\t\t\t\t<\/span>\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-bf21411 elementor-widget elementor-widget-text-editor\" data-id=\"bf21411\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p><span style=\"color: #352a87;\"><span style=\"font-size: 24px;\">PhD Candidate in\u00a0<\/span><\/span><span style=\"color: #352a87;\"><span style=\"font-size: 24px;\">Computer Science<\/span><\/span><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-d04271b elementor-widget elementor-widget-text-editor\" data-id=\"d04271b\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<div style=\"width: 1120px; margin-bottom: 5px;\" data-id=\"571d48f\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\"><p><span style=\"color: #333333;\"><b>Neurobotics Lab, Department of Computer Science<\/b><\/span><\/p><p><span style=\"color: #333333;\"><b>University of Freiburg<\/b><\/span><\/p><\/div>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-9fe98ef elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"9fe98ef\" data-element_type=\"section\" data-e-type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-50 elementor-top-column elementor-element elementor-element-6f52f17\" data-id=\"6f52f17\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-2ca30c2 elementor-widget elementor-widget-image\" data-id=\"2ca30c2\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"image.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img fetchpriority=\"high\" decoding=\"async\" width=\"525\" height=\"350\" src=\"https:\/\/elo-x.eu\/wp-content\/uploads\/2021\/10\/IMG_3447-scaled-e1633460845172-1024x683.jpg\" class=\"attachment-large size-large wp-image-820\" alt=\"\" srcset=\"https:\/\/elo-x.eu\/wp-content\/uploads\/2021\/10\/IMG_3447-scaled-e1633460845172-1024x683.jpg 1024w, https:\/\/elo-x.eu\/wp-content\/uploads\/2021\/10\/IMG_3447-scaled-e1633460845172-300x200.jpg 300w, https:\/\/elo-x.eu\/wp-content\/uploads\/2021\/10\/IMG_3447-scaled-e1633460845172-768x512.jpg 768w, https:\/\/elo-x.eu\/wp-content\/uploads\/2021\/10\/IMG_3447-scaled-e1633460845172-1536x1025.jpg 1536w, https:\/\/elo-x.eu\/wp-content\/uploads\/2021\/10\/IMG_3447-scaled-e1633460845172.jpg 1707w\" sizes=\"100vw\" \/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t<div class=\"elementor-column elementor-col-50 elementor-top-column elementor-element elementor-element-4b93b98\" data-id=\"4b93b98\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-8d2ff49 elementor-widget elementor-widget-text-editor\" data-id=\"8d2ff49\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p>Yuan Zhang received the B.Eng degree in Electronic Engineering from Tsinghua University in 2017, and the M.S.c degree in Machine Learning at University College London in 2018. After graduation, he come back to China and started to work on applying reinforcement learning in Natural Language Processing tasks including dialog policy learning and weak supervision learning in a startup called Laiye. His research interest lies in reinforcement learning, especially its applications in real-world scenarios (e.g. dialogue systems, games, robotics).<\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-ed4818f elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"ed4818f\" data-element_type=\"section\" data-e-type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-dd425d4\" data-id=\"dd425d4\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-b4b3cdf elementor-widget__width-inherit elementor-widget elementor-widget-video\" data-id=\"b4b3cdf\" data-element_type=\"widget\" data-e-type=\"widget\" data-settings=\"{&quot;youtube_url&quot;:&quot;https:\\\/\\\/youtu.be\\\/ihWlCjYlWP8&quot;,&quot;video_type&quot;:&quot;youtube&quot;,&quot;controls&quot;:&quot;yes&quot;}\" data-widget_type=\"video.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<div class=\"elementor-wrapper elementor-open-inline\">\n\t\t\t<div class=\"elementor-video\"><\/div>\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-6009267 elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"6009267\" data-element_type=\"section\" data-e-type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-9156808\" data-id=\"9156808\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-67e3347 my-divider elementor-widget-divider--view-line elementor-widget elementor-widget-divider\" data-id=\"67e3347\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"divider.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<div class=\"elementor-divider\">\n\t\t\t<span class=\"elementor-divider-separator\">\n\t\t\t\t\t\t<\/span>\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-bcef1d3 elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"bcef1d3\" data-element_type=\"section\" data-e-type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-668d476\" data-id=\"668d476\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-0d3490a elementor-widget elementor-widget-heading\" data-id=\"0d3490a\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Project description<\/h2>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-6887ee1 elementor-widget elementor-widget-text-editor\" data-id=\"6887ee1\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p>Deep Learning has brought significant progress in a variety of applications of machine learning in recent years. As powerful non-linear function approximators, their potential for use in learning-based control applications is very appealing. They benefit from large amounts of data, and present a very scalable solution e.g. for learning hard-to-model plant dynamics from data. Currently, the most widely-used method of training these deep networks are maximum likelihood approaches, which only give a point estimate of the parameters that maximize the likelihood of the input data, and do not quantify how certain the model is about its predictions. The uncertainty of the model is, however, a crucial factor in robust and risk-averse control applications. This is especially important when the learned dynamics model is to be used to predict over a longer horizon, resulting in compounding errors of inaccurate models. Bayesian Deep Learning approaches offer a promising alternative that allows to quantify model uncertainty explicitly, but many current approaches are difficult to scale, have high computational overhead, and poorly calibrated uncertainties. The objective for the ESR in this project will be to develop new Bayesian Deep Learning approaches, including recurrent architectures, that address these issues and are well suited for embedded control applications with their challenging constraints on computational complexity, memory, and real-time demands.\u00a0<\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-31ad7af elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"31ad7af\" data-element_type=\"section\" data-e-type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-47d1ac9\" data-id=\"47d1ac9\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-113d952 elementor-align-center elementor-widget elementor-widget-button\" data-id=\"113d952\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"button.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<div class=\"elementor-button-wrapper\">\n\t\t\t\t\t<a class=\"elementor-button elementor-button-link elementor-size-sm\" href=\"https:\/\/elo-x.eu\/?p=2559\">\n\t\t\t\t\t\t<span class=\"elementor-button-content-wrapper\">\n\t\t\t\t\t\t\t\t\t<span class=\"elementor-button-text\">Read more about this project<\/span>\n\t\t\t\t\t<\/span>\n\t\t\t\t\t<\/a>\n\t\t\t\t<\/div>\n\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-736dc4a elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"736dc4a\" data-element_type=\"section\" data-e-type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-03da713\" data-id=\"03da713\" data-element_type=\"column\" data-e-type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-89d6a8c my-divider elementor-widget-divider--view-line elementor-widget elementor-widget-divider\" data-id=\"89d6a8c\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"divider.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<div class=\"elementor-divider\">\n\t\t\t<span class=\"elementor-divider-separator\">\n\t\t\t\t\t\t<\/span>\n\t\t<\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-45eca99 elementor-widget elementor-widget-heading\" data-id=\"45eca99\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Publications<\/h2>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-dc8ede2 elementor-widget elementor-widget-shortcode\" data-id=\"dc8ede2\" data-element_type=\"widget\" data-e-type=\"widget\" data-widget_type=\"shortcode.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t<div class=\"elementor-shortcode\"><div class=\"teachpress_pub_list\"><form name=\"tppublistform\" method=\"get\"><a name=\"tppubs\" id=\"tppubs\"><\/a><div class=\"tp_search_input\"><input type=\"hidden\" name=\"p\" id=\"page_id\" value=\"819\"\/><input name=\"tsr\" id=\"tp_search_input_field\" type=\"search\" placeholder=\"Enter search word\" value=\"\" tabindex=\"1\"\/><div class=\"teachpress_search_button\"><input name=\"tps_button\" class=\"tp_search_button\" type=\"submit\" tabindex=\"10\" value=\"Search\"\/><\/div><\/div><\/form><div class=\"teachpress_publication_list\"><div class=\"tp_publication tp_publication_inproceedings\"><div class=\"tp_pub_number\">1.<\/div><div class=\"tp_pub_info\"><p class=\"tp_pub_author\"> Zhang, Yuan;  Hoffman, Jasper;  Boedecker, Joschka<\/p><p class=\"tp_pub_title\"><a class=\"tp_title_link\" onclick=\"teachpress_pub_showhide('83','tp_links')\" style=\"cursor:pointer;\">UDUC: An Uncertainty-driven Approach for Learning-based Robust Control<\/a> <span class=\"tp_pub_type tp_  inproceedings\">Proceedings Article<\/span> <\/p><p class=\"tp_pub_additional\"><span class=\"tp_pub_additional_in\">In: <\/span><span class=\"tp_pub_additional_booktitle\">ECAI 2024 - 27th European Conference on Artificial Intelligence - Including 13th Conference on Prestigious Applications of Intelligent Systems (PAIS 2024), <\/span><span class=\"tp_pub_additional_pages\">pp. 4402-4409, <\/span><span class=\"tp_pub_additional_publisher\">IOS Press, <\/span><span class=\"tp_pub_additional_address\">Santiago de Compostela, Spain, <\/span><span class=\"tp_pub_additional_year\">2024<\/span>.<\/p><p class=\"tp_pub_menu\"><span class=\"tp_abstract_link\"><a id=\"tp_abstract_sh_83\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('83','tp_abstract')\" title=\"Show abstract\" style=\"cursor:pointer;\">Abstract<\/a><\/span> | <span class=\"tp_resource_link\"><a id=\"tp_links_sh_83\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('83','tp_links')\" title=\"Show links and resources\" style=\"cursor:pointer;\">Links<\/a><\/span> | <span class=\"tp_bibtex_link\"><a id=\"tp_bibtex_sh_83\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('83','tp_bibtex')\" title=\"Show BibTeX entry\" style=\"cursor:pointer;\">BibTeX<\/a><\/span><\/p><div class=\"tp_bibtex\" id=\"tp_bibtex_83\" style=\"display:none;\"><div class=\"tp_bibtex_entry\"><pre>@inproceedings{zhang2024uduc,<br \/>\r\ntitle = {UDUC: An Uncertainty-driven Approach for Learning-based Robust Control},<br \/>\r\nauthor = {Yuan Zhang and Jasper Hoffman and Joschka Boedecker},<br \/>\r\nurl = {https:\/\/arxiv.org\/abs\/2405.02598},<br \/>\r\ndoi = {10.3233\/FAIA241018},<br \/>\r\nyear  = {2024},<br \/>\r\ndate = {2024-10-24},<br \/>\r\nurldate = {2024-10-24},<br \/>\r\nbooktitle = {ECAI 2024 - 27th European Conference on Artificial Intelligence - Including 13th Conference on Prestigious Applications of Intelligent Systems (PAIS 2024)},<br \/>\r\nvolume = {392},<br \/>\r\npages = {4402-4409},<br \/>\r\npublisher = {IOS Press},<br \/>\r\naddress = {Santiago de Compostela, Spain},<br \/>\r\nseries = {Frontiers in Artificial Intelligence and Applications},<br \/>\r\nabstract = {Learning-based techniques have become popular in both model predictive control (MPC) and reinforcement learning (RL). Probabilistic ensemble (PE) models offer a promising approach for modelling system dynamics, showcasing the ability to capture uncertainty and scalability in high-dimensional control scenarios. However, PE models are susceptible to mode collapse, resulting in non-robust control when faced with environments slightly different from the training set. In this paper, we introduce the uncertainty-driven robust control (UDUC) loss as an alternative objective for training PE models, drawing inspiration from contrastive learning. We analyze the robustness of the UDUC loss through the lens of robust optimization and evaluate its performance on the challenging real-world reinforcement learning (RWRL) benchmark, which involves significant environmental mismatches between the training and testing environments.},<br \/>\r\nkeywords = {},<br \/>\r\npubstate = {published},<br \/>\r\ntppubtype = {inproceedings}<br \/>\r\n}<br \/>\r\n<\/pre><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('83','tp_bibtex')\">Close<\/a><\/p><\/div><div class=\"tp_abstract\" id=\"tp_abstract_83\" style=\"display:none;\"><div class=\"tp_abstract_entry\">Learning-based techniques have become popular in both model predictive control (MPC) and reinforcement learning (RL). Probabilistic ensemble (PE) models offer a promising approach for modelling system dynamics, showcasing the ability to capture uncertainty and scalability in high-dimensional control scenarios. However, PE models are susceptible to mode collapse, resulting in non-robust control when faced with environments slightly different from the training set. In this paper, we introduce the uncertainty-driven robust control (UDUC) loss as an alternative objective for training PE models, drawing inspiration from contrastive learning. We analyze the robustness of the UDUC loss through the lens of robust optimization and evaluate its performance on the challenging real-world reinforcement learning (RWRL) benchmark, which involves significant environmental mismatches between the training and testing environments.<\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('83','tp_abstract')\">Close<\/a><\/p><\/div><div class=\"tp_links\" id=\"tp_links_83\" style=\"display:none;\"><div class=\"tp_links_entry\"><ul class=\"tp_pub_list\"><li><i class=\"ai ai-arxiv\"><\/i><a class=\"tp_pub_list\" href=\"https:\/\/arxiv.org\/abs\/2405.02598\" title=\"https:\/\/arxiv.org\/abs\/2405.02598\" target=\"_blank\">https:\/\/arxiv.org\/abs\/2405.02598<\/a><\/li><li><i class=\"ai ai-doi\"><\/i><a class=\"tp_pub_list\" href=\"https:\/\/dx.doi.org\/10.3233\/FAIA241018\" title=\"Follow DOI:10.3233\/FAIA241018\" target=\"_blank\">doi:10.3233\/FAIA241018<\/a><\/li><\/ul><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('83','tp_links')\">Close<\/a><\/p><\/div><\/div><\/div><div class=\"tp_publication tp_publication_workshop\"><div class=\"tp_pub_number\">2.<\/div><div class=\"tp_pub_info\"><p class=\"tp_pub_author\"> Schulz, Felix;  Hoffman, Jasper;  Zhang, Yuan;  Boedecker, Joschka<\/p><p class=\"tp_pub_title\"><a class=\"tp_title_link\" onclick=\"teachpress_pub_showhide('104','tp_links')\" style=\"cursor:pointer;\">Learning When to Trust the Expert for Guided Exploration in RL<\/a> <span class=\"tp_pub_type tp_  workshop\">Workshop<\/span> <\/p><p class=\"tp_pub_additional\"><span class=\"tp_pub_additional_year\">2024<\/span><span class=\"tp_pub_additional_note\">, (ICML 2024 Workshop: Foundations of Reinforcement Learning and Control -- Connections and Perspectives)<\/span>.<\/p><p class=\"tp_pub_menu\"><span class=\"tp_abstract_link\"><a id=\"tp_abstract_sh_104\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('104','tp_abstract')\" title=\"Show abstract\" style=\"cursor:pointer;\">Abstract<\/a><\/span> | <span class=\"tp_resource_link\"><a id=\"tp_links_sh_104\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('104','tp_links')\" title=\"Show links and resources\" style=\"cursor:pointer;\">Links<\/a><\/span> | <span class=\"tp_bibtex_link\"><a id=\"tp_bibtex_sh_104\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('104','tp_bibtex')\" title=\"Show BibTeX entry\" style=\"cursor:pointer;\">BibTeX<\/a><\/span><\/p><div class=\"tp_bibtex\" id=\"tp_bibtex_104\" style=\"display:none;\"><div class=\"tp_bibtex_entry\"><pre>@workshop{schulz2024learning,<br \/>\r\ntitle = {Learning When to Trust the Expert for Guided Exploration in RL},<br \/>\r\nauthor = {Felix Schulz and Jasper Hoffman and Yuan Zhang and Joschka Boedecker },<br \/>\r\nurl = {https:\/\/openreview.net\/forum?id=QkTANn4mRa},<br \/>\r\nyear  = {2024},<br \/>\r\ndate = {2024-08-07},<br \/>\r\nurldate = {2025-08-07},<br \/>\r\nabstract = {Reinforcement learning (RL) algorithms often rely on trial and error for exploring environments, leading to local minima and high sample inefficiency during training. In many cases, leveraging prior knowledge can efficiently construct expert policies, e.g. model predictive control (MPC) techniques. However, the expert might not be optimal and thus, when used as a prior, might introduce bias that can harm the control performance. Thus, in this work, we propose a novel RL method based on a simple options framework that only uses the expert to guide the exploration during training. The exploration is controlled by a learned high-level policy that can decide to follow either an expert policy or a learned low-level policy. In that sense, the high-level skip policy learns when to trust the expert for exploration. As we aim at deploying the low-level policy without accessing the expert after training, we increasingly regularize the usage of the expert during training, to reduce the covariate shift problem. Using different environments combined with potentially sub-optimal experts derived from MPC or RL, we find that our method improves over sub-optimal experts and significantly improves the sample efficiency.},<br \/>\r\nnote = {ICML 2024 Workshop: Foundations of Reinforcement Learning and Control -- Connections and Perspectives},<br \/>\r\nkeywords = {},<br \/>\r\npubstate = {published},<br \/>\r\ntppubtype = {workshop}<br \/>\r\n}<br \/>\r\n<\/pre><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('104','tp_bibtex')\">Close<\/a><\/p><\/div><div class=\"tp_abstract\" id=\"tp_abstract_104\" style=\"display:none;\"><div class=\"tp_abstract_entry\">Reinforcement learning (RL) algorithms often rely on trial and error for exploring environments, leading to local minima and high sample inefficiency during training. In many cases, leveraging prior knowledge can efficiently construct expert policies, e.g. model predictive control (MPC) techniques. However, the expert might not be optimal and thus, when used as a prior, might introduce bias that can harm the control performance. Thus, in this work, we propose a novel RL method based on a simple options framework that only uses the expert to guide the exploration during training. The exploration is controlled by a learned high-level policy that can decide to follow either an expert policy or a learned low-level policy. In that sense, the high-level skip policy learns when to trust the expert for exploration. As we aim at deploying the low-level policy without accessing the expert after training, we increasingly regularize the usage of the expert during training, to reduce the covariate shift problem. Using different environments combined with potentially sub-optimal experts derived from MPC or RL, we find that our method improves over sub-optimal experts and significantly improves the sample efficiency.<\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('104','tp_abstract')\">Close<\/a><\/p><\/div><div class=\"tp_links\" id=\"tp_links_104\" style=\"display:none;\"><div class=\"tp_links_entry\"><ul class=\"tp_pub_list\"><li><i class=\"fas fa-globe\"><\/i><a class=\"tp_pub_list\" href=\"https:\/\/openreview.net\/forum?id=QkTANn4mRa\" title=\"https:\/\/openreview.net\/forum?id=QkTANn4mRa\" target=\"_blank\">https:\/\/openreview.net\/forum?id=QkTANn4mRa<\/a><\/li><\/ul><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('104','tp_links')\">Close<\/a><\/p><\/div><\/div><\/div><div class=\"tp_publication tp_publication_workingpaper\"><div class=\"tp_pub_number\">3.<\/div><div class=\"tp_pub_info\"><p class=\"tp_pub_author\"> Zhang, Yuan;  Yang, Shaohui;  Ohtsuka, Toshiyuki;  Jones, Colin;  Boedecker, Joschka<\/p><p class=\"tp_pub_title\"><a class=\"tp_title_link\" onclick=\"teachpress_pub_showhide('103','tp_links')\" style=\"cursor:pointer;\">Latent Linear Quadratic Regulator for Robotic Control Tasks<\/a> <span class=\"tp_pub_type tp_  workingpaper\">Working paper<\/span> <\/p><p class=\"tp_pub_additional\"><span class=\"tp_pub_additional_year\">2024<\/span><span class=\"tp_pub_additional_note\">, (RSS 2024 Workshop on Koopman Operators in Robotics)<\/span>.<\/p><p class=\"tp_pub_menu\"><span class=\"tp_abstract_link\"><a id=\"tp_abstract_sh_103\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('103','tp_abstract')\" title=\"Show abstract\" style=\"cursor:pointer;\">Abstract<\/a><\/span> | <span class=\"tp_resource_link\"><a id=\"tp_links_sh_103\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('103','tp_links')\" title=\"Show links and resources\" style=\"cursor:pointer;\">Links<\/a><\/span> | <span class=\"tp_bibtex_link\"><a id=\"tp_bibtex_sh_103\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('103','tp_bibtex')\" title=\"Show BibTeX entry\" style=\"cursor:pointer;\">BibTeX<\/a><\/span><\/p><div class=\"tp_bibtex\" id=\"tp_bibtex_103\" style=\"display:none;\"><div class=\"tp_bibtex_entry\"><pre>@workingpaper{zhang2024latent,<br \/>\r\ntitle = {Latent Linear Quadratic Regulator for Robotic Control Tasks},<br \/>\r\nauthor = {Yuan Zhang and Shaohui Yang and Toshiyuki Ohtsuka and Colin Jones and Joschka Boedecker},<br \/>\r\nurl = {https:\/\/arxiv.org\/abs\/2407.11107},<br \/>\r\nyear  = {2024},<br \/>\r\ndate = {2024-07-01},<br \/>\r\nurldate = {2024-07-01},<br \/>\r\nbooktitle = {RSS 2024 Workshop on Koopman Operators in Robotics},<br \/>\r\nabstract = {Model predictive control (MPC) has played a more crucial role in various robotic control tasks, but its high computational requirements are concerning, especially for nonlinear dynamical models. This paper presents a latent linear quadratic regulator (LaLQR) that maps the state space into a latent space, on which the dynamical model is linear and the cost function is quadratic, allowing the efficient application of LQR. We jointly learn this alternative system by imitating the original MPC. Experiments show LaLQR's superior efficiency and generalization compared to other baselines.},<br \/>\r\nnote = {RSS 2024 Workshop on Koopman Operators in Robotics},<br \/>\r\nkeywords = {},<br \/>\r\npubstate = {published},<br \/>\r\ntppubtype = {workingpaper}<br \/>\r\n}<br \/>\r\n<\/pre><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('103','tp_bibtex')\">Close<\/a><\/p><\/div><div class=\"tp_abstract\" id=\"tp_abstract_103\" style=\"display:none;\"><div class=\"tp_abstract_entry\">Model predictive control (MPC) has played a more crucial role in various robotic control tasks, but its high computational requirements are concerning, especially for nonlinear dynamical models. This paper presents a latent linear quadratic regulator (LaLQR) that maps the state space into a latent space, on which the dynamical model is linear and the cost function is quadratic, allowing the efficient application of LQR. We jointly learn this alternative system by imitating the original MPC. Experiments show LaLQR's superior efficiency and generalization compared to other baselines.<\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('103','tp_abstract')\">Close<\/a><\/p><\/div><div class=\"tp_links\" id=\"tp_links_103\" style=\"display:none;\"><div class=\"tp_links_entry\"><ul class=\"tp_pub_list\"><li><i class=\"ai ai-arxiv\"><\/i><a class=\"tp_pub_list\" href=\"https:\/\/arxiv.org\/abs\/2407.11107\" title=\"https:\/\/arxiv.org\/abs\/2407.11107\" target=\"_blank\">https:\/\/arxiv.org\/abs\/2407.11107<\/a><\/li><\/ul><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('103','tp_links')\">Close<\/a><\/p><\/div><\/div><\/div><div class=\"tp_publication tp_publication_inproceedings\"><div class=\"tp_pub_number\">4.<\/div><div class=\"tp_pub_info\"><p class=\"tp_pub_author\"> Zhang, Yuan;  Deekshith, Umashankar;  Wang, Jianhong;  Boedecker, Joschka<\/p><p class=\"tp_pub_title\"><a class=\"tp_title_link\" onclick=\"teachpress_pub_showhide('85','tp_links')\" style=\"cursor:pointer;\">LCPPO: An Efficient Multi-agent Reinforcement Learning Algorithm on Complex Railway Network<\/a> <span class=\"tp_pub_type tp_  inproceedings\">Proceedings Article<\/span> <\/p><p class=\"tp_pub_additional\"><span class=\"tp_pub_additional_in\">In: <\/span><span class=\"tp_pub_additional_booktitle\">34th International Conference on Automated Planning and Scheduling, <\/span><span class=\"tp_pub_additional_year\">2024<\/span>.<\/p><p class=\"tp_pub_menu\"><span class=\"tp_resource_link\"><a id=\"tp_links_sh_85\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('85','tp_links')\" title=\"Show links and resources\" style=\"cursor:pointer;\">Links<\/a><\/span> | <span class=\"tp_bibtex_link\"><a id=\"tp_bibtex_sh_85\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('85','tp_bibtex')\" title=\"Show BibTeX entry\" style=\"cursor:pointer;\">BibTeX<\/a><\/span><\/p><div class=\"tp_bibtex\" id=\"tp_bibtex_85\" style=\"display:none;\"><div class=\"tp_bibtex_entry\"><pre>@inproceedings{zhanglcppo,<br \/>\r\ntitle = {LCPPO: An Efficient Multi-agent Reinforcement Learning Algorithm on Complex Railway Network},<br \/>\r\nauthor = {Yuan Zhang and Umashankar Deekshith and Jianhong Wang and Joschka Boedecker},<br \/>\r\nurl = {https:\/\/openreview.net\/forum?id=gylH3hNASm},<br \/>\r\nyear  = {2024},<br \/>\r\ndate = {2024-05-09},<br \/>\r\nurldate = {2024-05-09},<br \/>\r\nbooktitle = {34th International Conference on Automated Planning and Scheduling},<br \/>\r\nkeywords = {},<br \/>\r\npubstate = {published},<br \/>\r\ntppubtype = {inproceedings}<br \/>\r\n}<br \/>\r\n<\/pre><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('85','tp_bibtex')\">Close<\/a><\/p><\/div><div class=\"tp_links\" id=\"tp_links_85\" style=\"display:none;\"><div class=\"tp_links_entry\"><ul class=\"tp_pub_list\"><li><i class=\"fas fa-globe\"><\/i><a class=\"tp_pub_list\" href=\"https:\/\/openreview.net\/forum?id=gylH3hNASm\" title=\"https:\/\/openreview.net\/forum?id=gylH3hNASm\" target=\"_blank\">https:\/\/openreview.net\/forum?id=gylH3hNASm<\/a><\/li><\/ul><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('85','tp_links')\">Close<\/a><\/p><\/div><\/div><\/div><div class=\"tp_publication tp_publication_inproceedings\"><div class=\"tp_pub_number\">5.<\/div><div class=\"tp_pub_info\"><p class=\"tp_pub_author\"> Wang, Jianhong;  Li, Yang;  Zhang, Yuan;  Pan, Wei;  Kaski, Samuel<\/p><p class=\"tp_pub_title\"><a class=\"tp_title_link\" onclick=\"teachpress_pub_showhide('84','tp_links')\" style=\"cursor:pointer;\">Open Ad Hoc Teamwork with Cooperative Game Theory<\/a> <span class=\"tp_pub_type tp_  inproceedings\">Proceedings Article<\/span> <\/p><p class=\"tp_pub_additional\"><span class=\"tp_pub_additional_in\">In: <\/span><span class=\"tp_pub_additional_booktitle\">Forty-first International Conference on Machine Learning, <\/span><span class=\"tp_pub_additional_year\">2024<\/span>.<\/p><p class=\"tp_pub_menu\"><span class=\"tp_resource_link\"><a id=\"tp_links_sh_84\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('84','tp_links')\" title=\"Show links and resources\" style=\"cursor:pointer;\">Links<\/a><\/span> | <span class=\"tp_bibtex_link\"><a id=\"tp_bibtex_sh_84\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('84','tp_bibtex')\" title=\"Show BibTeX entry\" style=\"cursor:pointer;\">BibTeX<\/a><\/span><\/p><div class=\"tp_bibtex\" id=\"tp_bibtex_84\" style=\"display:none;\"><div class=\"tp_bibtex_entry\"><pre>@inproceedings{wang2024open,<br \/>\r\ntitle = {Open Ad Hoc Teamwork with Cooperative Game Theory},<br \/>\r\nauthor = {Jianhong Wang and Yang Li and Yuan Zhang and Wei Pan and Samuel Kaski},<br \/>\r\nurl = {https:\/\/openreview.net\/forum?id=RlibRvH4B4},<br \/>\r\nyear  = {2024},<br \/>\r\ndate = {2024-05-09},<br \/>\r\nurldate = {2024-05-09},<br \/>\r\nbooktitle = {Forty-first International Conference on Machine Learning},<br \/>\r\nkeywords = {},<br \/>\r\npubstate = {published},<br \/>\r\ntppubtype = {inproceedings}<br \/>\r\n}<br \/>\r\n<\/pre><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('84','tp_bibtex')\">Close<\/a><\/p><\/div><div class=\"tp_links\" id=\"tp_links_84\" style=\"display:none;\"><div class=\"tp_links_entry\"><ul class=\"tp_pub_list\"><li><i class=\"fas fa-globe\"><\/i><a class=\"tp_pub_list\" href=\"https:\/\/openreview.net\/forum?id=RlibRvH4B4\" title=\"https:\/\/openreview.net\/forum?id=RlibRvH4B4\" target=\"_blank\">https:\/\/openreview.net\/forum?id=RlibRvH4B4<\/a><\/li><\/ul><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('84','tp_links')\">Close<\/a><\/p><\/div><\/div><\/div><div class=\"tp_publication tp_publication_inproceedings\"><div class=\"tp_pub_number\">6.<\/div><div class=\"tp_pub_info\"><p class=\"tp_pub_author\"> Shengchao, Yan;  Zhang, Yuan;  Zhang, Bohe;  Boedecker, Joschka;  Burgard, Wolfram<\/p><p class=\"tp_pub_title\"><a class=\"tp_title_link\" onclick=\"teachpress_pub_showhide('86','tp_links')\" style=\"cursor:pointer;\">Learning Continuous Control with Geometric Regularity from Robot Intrinsic Symmetry<\/a> <span class=\"tp_pub_type tp_  inproceedings\">Proceedings Article<\/span> <\/p><p class=\"tp_pub_additional\"><span class=\"tp_pub_additional_in\">In: <\/span><span class=\"tp_pub_additional_booktitle\">2024 IEEE International Conference on Robotics and Automation ICRA, <\/span><span class=\"tp_pub_additional_year\">2024<\/span>.<\/p><p class=\"tp_pub_menu\"><span class=\"tp_resource_link\"><a id=\"tp_links_sh_86\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('86','tp_links')\" title=\"Show links and resources\" style=\"cursor:pointer;\">Links<\/a><\/span> | <span class=\"tp_bibtex_link\"><a id=\"tp_bibtex_sh_86\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('86','tp_bibtex')\" title=\"Show BibTeX entry\" style=\"cursor:pointer;\">BibTeX<\/a><\/span><\/p><div class=\"tp_bibtex\" id=\"tp_bibtex_86\" style=\"display:none;\"><div class=\"tp_bibtex_entry\"><pre>@inproceedings{yan2023geometricb,<br \/>\r\ntitle = {Learning Continuous Control with Geometric Regularity from Robot Intrinsic Symmetry},<br \/>\r\nauthor = {Yan Shengchao and Yuan Zhang and Bohe Zhang and Joschka Boedecker and Wolfram Burgard},<br \/>\r\nurl = {https:\/\/arxiv.org\/abs\/2306.16316},<br \/>\r\nyear  = {2024},<br \/>\r\ndate = {2024-05-09},<br \/>\r\nurldate = {2024-05-09},<br \/>\r\nbooktitle = {2024 IEEE International Conference on Robotics and Automation ICRA},<br \/>\r\nkeywords = {},<br \/>\r\npubstate = {published},<br \/>\r\ntppubtype = {inproceedings}<br \/>\r\n}<br \/>\r\n<\/pre><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('86','tp_bibtex')\">Close<\/a><\/p><\/div><div class=\"tp_links\" id=\"tp_links_86\" style=\"display:none;\"><div class=\"tp_links_entry\"><ul class=\"tp_pub_list\"><li><i class=\"ai ai-arxiv\"><\/i><a class=\"tp_pub_list\" href=\"https:\/\/arxiv.org\/abs\/2306.16316\" title=\"https:\/\/arxiv.org\/abs\/2306.16316\" target=\"_blank\">https:\/\/arxiv.org\/abs\/2306.16316<\/a><\/li><\/ul><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('86','tp_links')\">Close<\/a><\/p><\/div><\/div><\/div><div class=\"tp_publication tp_publication_inproceedings\"><div class=\"tp_pub_number\">7.<\/div><div class=\"tp_pub_info\"><p class=\"tp_pub_author\"> Zhang, Yuan;  Wang, Jianhong;  Boedecker, Joschka<\/p><p class=\"tp_pub_title\"><a class=\"tp_title_link\" onclick=\"teachpress_pub_showhide('12','tp_links')\" style=\"cursor:pointer;\">Robust Reinforcement Learning in Continuous Control Tasks with Uncertainty Set Regularization<\/a> <span class=\"tp_pub_type tp_  inproceedings\">Proceedings Article<\/span> <\/p><p class=\"tp_pub_additional\"><span class=\"tp_pub_additional_in\">In: <\/span><span class=\"tp_pub_additional_booktitle\">7th Annual Conference on Robot Learning, <\/span><span class=\"tp_pub_additional_year\">2023<\/span>.<\/p><p class=\"tp_pub_menu\"><span class=\"tp_abstract_link\"><a id=\"tp_abstract_sh_12\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('12','tp_abstract')\" title=\"Show abstract\" style=\"cursor:pointer;\">Abstract<\/a><\/span> | <span class=\"tp_resource_link\"><a id=\"tp_links_sh_12\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('12','tp_links')\" title=\"Show links and resources\" style=\"cursor:pointer;\">Links<\/a><\/span> | <span class=\"tp_bibtex_link\"><a id=\"tp_bibtex_sh_12\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('12','tp_bibtex')\" title=\"Show BibTeX entry\" style=\"cursor:pointer;\">BibTeX<\/a><\/span><\/p><div class=\"tp_bibtex\" id=\"tp_bibtex_12\" style=\"display:none;\"><div class=\"tp_bibtex_entry\"><pre>@inproceedings{zhang2023robust,<br \/>\r\ntitle = {Robust Reinforcement Learning in Continuous Control Tasks with Uncertainty Set Regularization},<br \/>\r\nauthor = {Yuan Zhang and Jianhong Wang and Joschka Boedecker},<br \/>\r\nurl = {https:\/\/openreview.net\/forum?id=keAPCON4jHC},<br \/>\r\nyear  = {2023},<br \/>\r\ndate = {2023-10-16},<br \/>\r\nurldate = {2023-10-16},<br \/>\r\nbooktitle = {7th Annual Conference on Robot Learning},<br \/>\r\nabstract = {Reinforcement learning (RL) is recognized as lacking generalization and robustness under environmental perturbations, which excessively restricts its application for real-world robotics. Prior work claimed that adding regularization to the value function is equivalent to learning a robust policy under uncertain transitions. Although the regularization-robustness transformation is appealing for its simplicity and efficiency, it is still lacking in continuous control tasks. In this paper, we propose a new regularizer named Uncertainty Set Regularizer (USR), to formulate the uncertainty set on the parametric space of a transition function. To deal with unknown uncertainty sets, we further propose a novel adversarial approach to generate them based on the value function. We evaluate USR on the Real-world Reinforcement Learning (RWRL) benchmark and the Unitree A1 Robot, demonstrating improvements in the robust performance of perturbed testing environments and sim-to-real scenarios.},<br \/>\r\nkeywords = {},<br \/>\r\npubstate = {published},<br \/>\r\ntppubtype = {inproceedings}<br \/>\r\n}<br \/>\r\n<\/pre><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('12','tp_bibtex')\">Close<\/a><\/p><\/div><div class=\"tp_abstract\" id=\"tp_abstract_12\" style=\"display:none;\"><div class=\"tp_abstract_entry\">Reinforcement learning (RL) is recognized as lacking generalization and robustness under environmental perturbations, which excessively restricts its application for real-world robotics. Prior work claimed that adding regularization to the value function is equivalent to learning a robust policy under uncertain transitions. Although the regularization-robustness transformation is appealing for its simplicity and efficiency, it is still lacking in continuous control tasks. In this paper, we propose a new regularizer named Uncertainty Set Regularizer (USR), to formulate the uncertainty set on the parametric space of a transition function. To deal with unknown uncertainty sets, we further propose a novel adversarial approach to generate them based on the value function. We evaluate USR on the Real-world Reinforcement Learning (RWRL) benchmark and the Unitree A1 Robot, demonstrating improvements in the robust performance of perturbed testing environments and sim-to-real scenarios.<\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('12','tp_abstract')\">Close<\/a><\/p><\/div><div class=\"tp_links\" id=\"tp_links_12\" style=\"display:none;\"><div class=\"tp_links_entry\"><ul class=\"tp_pub_list\"><li><i class=\"fas fa-globe\"><\/i><a class=\"tp_pub_list\" href=\"https:\/\/openreview.net\/forum?id=keAPCON4jHC\" title=\"https:\/\/openreview.net\/forum?id=keAPCON4jHC\" target=\"_blank\">https:\/\/openreview.net\/forum?id=keAPCON4jHC<\/a><\/li><\/ul><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('12','tp_links')\">Close<\/a><\/p><\/div><\/div><\/div><div class=\"tp_publication tp_publication_workshop\"><div class=\"tp_pub_number\">8.<\/div><div class=\"tp_pub_info\"><p class=\"tp_pub_author\"> Yan, Schengchao;  Zhang, Yuan;  Zhang, Baohe;  Boedecker, Joschka;  Burgard, Wolfram<\/p><p class=\"tp_pub_title\"><a class=\"tp_title_link\" onclick=\"teachpress_pub_showhide('59','tp_links')\" style=\"cursor:pointer;\">Geometric Regularity with Robot Intrinsic Symmetry in Reinforcement Learning<\/a> <span class=\"tp_pub_type tp_  workshop\">Workshop<\/span> <\/p><p class=\"tp_pub_additional\"><span class=\"tp_pub_additional_booktitle\">RSS 2023 Workshop on Symmetries in Robot Learning, <\/span><span class=\"tp_pub_additional_year\">2023<\/span>.<\/p><p class=\"tp_pub_menu\"><span class=\"tp_abstract_link\"><a id=\"tp_abstract_sh_59\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('59','tp_abstract')\" title=\"Show abstract\" style=\"cursor:pointer;\">Abstract<\/a><\/span> | <span class=\"tp_resource_link\"><a id=\"tp_links_sh_59\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('59','tp_links')\" title=\"Show links and resources\" style=\"cursor:pointer;\">Links<\/a><\/span> | <span class=\"tp_bibtex_link\"><a id=\"tp_bibtex_sh_59\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('59','tp_bibtex')\" title=\"Show BibTeX entry\" style=\"cursor:pointer;\">BibTeX<\/a><\/span><\/p><div class=\"tp_bibtex\" id=\"tp_bibtex_59\" style=\"display:none;\"><div class=\"tp_bibtex_entry\"><pre>@workshop{yan2023geometric,<br \/>\r\ntitle = {Geometric Regularity with Robot Intrinsic Symmetry in Reinforcement Learning},<br \/>\r\nauthor = {Schengchao Yan and Yuan Zhang and Baohe Zhang and Joschka Boedecker and Wolfram Burgard},<br \/>\r\nurl = {https:\/\/doi.org\/10.48550\/arXiv.2306.16316},<br \/>\r\nyear  = {2023},<br \/>\r\ndate = {2023-06-28},<br \/>\r\nurldate = {2023-06-28},<br \/>\r\nbooktitle = {RSS 2023 Workshop on Symmetries in Robot Learning},<br \/>\r\nabstract = {Geometric regularity, which leverages data symmetry, has been successfully incorporated into deep learning architectures such as CNNs, RNNs, GNNs, and Transformers. While this concept has been widely applied in robotics to address the curse of dimensionality when learning from high-dimensional data, the inherent reflectional and rotational symmetry of robot structures has not been adequately explored. Drawing inspiration from cooperative multi-agent reinforcement learning, we introduce novel network structures for deep learning algorithms that explicitly capture this geometric regularity. Moreover, we investigate the relationship between the geometric prior and the concept of Parameter Sharing in multi-agent reinforcement learning. Through experiments conducted on various challenging continuous control tasks, we demonstrate the significant potential of the proposed geometric regularity in enhancing robot learning capabilities.},<br \/>\r\nkeywords = {},<br \/>\r\npubstate = {published},<br \/>\r\ntppubtype = {workshop}<br \/>\r\n}<br \/>\r\n<\/pre><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('59','tp_bibtex')\">Close<\/a><\/p><\/div><div class=\"tp_abstract\" id=\"tp_abstract_59\" style=\"display:none;\"><div class=\"tp_abstract_entry\">Geometric regularity, which leverages data symmetry, has been successfully incorporated into deep learning architectures such as CNNs, RNNs, GNNs, and Transformers. While this concept has been widely applied in robotics to address the curse of dimensionality when learning from high-dimensional data, the inherent reflectional and rotational symmetry of robot structures has not been adequately explored. Drawing inspiration from cooperative multi-agent reinforcement learning, we introduce novel network structures for deep learning algorithms that explicitly capture this geometric regularity. Moreover, we investigate the relationship between the geometric prior and the concept of Parameter Sharing in multi-agent reinforcement learning. Through experiments conducted on various challenging continuous control tasks, we demonstrate the significant potential of the proposed geometric regularity in enhancing robot learning capabilities.<\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('59','tp_abstract')\">Close<\/a><\/p><\/div><div class=\"tp_links\" id=\"tp_links_59\" style=\"display:none;\"><div class=\"tp_links_entry\"><ul class=\"tp_pub_list\"><li><i class=\"fas fa-globe\"><\/i><a class=\"tp_pub_list\" href=\"https:\/\/doi.org\/10.48550\/arXiv.2306.16316\" title=\"https:\/\/doi.org\/10.48550\/arXiv.2306.16316\" target=\"_blank\">https:\/\/doi.org\/10.48550\/arXiv.2306.16316<\/a><\/li><\/ul><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('59','tp_links')\">Close<\/a><\/p><\/div><\/div><\/div><div class=\"tp_publication tp_publication_workingpaper\"><div class=\"tp_pub_number\">9.<\/div><div class=\"tp_pub_info\"><p class=\"tp_pub_author\"> Zhang, Yuan;  Boedecker, Joschka;  Li, Chuxuan;  Zhou, Guyue<\/p><p class=\"tp_pub_title\"><a class=\"tp_title_link\" onclick=\"teachpress_pub_showhide('58','tp_links')\" style=\"cursor:pointer;\">Incorporating Recurrent Reinforcement Learning into Model Predictive Control for Adaptive Control in Autonomous Driving<\/a> <span class=\"tp_pub_type tp_  workingpaper\">Working paper<\/span> <\/p><p class=\"tp_pub_additional\"><span class=\"tp_pub_additional_year\">2023<\/span>.<\/p><p class=\"tp_pub_menu\"><span class=\"tp_abstract_link\"><a id=\"tp_abstract_sh_58\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('58','tp_abstract')\" title=\"Show abstract\" style=\"cursor:pointer;\">Abstract<\/a><\/span> | <span class=\"tp_resource_link\"><a id=\"tp_links_sh_58\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('58','tp_links')\" title=\"Show links and resources\" style=\"cursor:pointer;\">Links<\/a><\/span> | <span class=\"tp_bibtex_link\"><a id=\"tp_bibtex_sh_58\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('58','tp_bibtex')\" title=\"Show BibTeX entry\" style=\"cursor:pointer;\">BibTeX<\/a><\/span><\/p><div class=\"tp_bibtex\" id=\"tp_bibtex_58\" style=\"display:none;\"><div class=\"tp_bibtex_entry\"><pre>@workingpaper{zhang2023incorporating,<br \/>\r\ntitle = {Incorporating Recurrent Reinforcement Learning into Model Predictive Control for Adaptive Control in Autonomous Driving},<br \/>\r\nauthor = {Yuan Zhang and Joschka Boedecker and Chuxuan Li and Guyue Zhou},<br \/>\r\ndoi = {https:\/\/doi.org\/10.48550\/arXiv.2301.13313},<br \/>\r\nyear  = {2023},<br \/>\r\ndate = {2023-04-27},<br \/>\r\nurldate = {2023-04-27},<br \/>\r\nabstract = {Model Predictive Control (MPC) is attracting tremendous attention in the autonomous driving task as a powerful control technique. The success of an MPC controller strongly depends on an accurate internal dynamics model. However, the static parameters, usually learned by system identification, often fail to adapt to both internal and external perturbations in real-world scenarios. In this paper, we firstly (1) reformulate the problem as a Partially Observed Markov Decision Process (POMDP) that absorbs the uncertainties into observations and maintains Markov property into hidden states; and (2) learn a recurrent policy continually adapting the parameters of the dynamics model via Recurrent Reinforcement Learning (RRL) for optimal and adaptive control; and (3) finally evaluate the proposed algorithm (referred as MPC-RRL) in CARLA simulator and leading to robust behaviours under a wide range of perturbations.},<br \/>\r\nkeywords = {},<br \/>\r\npubstate = {published},<br \/>\r\ntppubtype = {workingpaper}<br \/>\r\n}<br \/>\r\n<\/pre><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('58','tp_bibtex')\">Close<\/a><\/p><\/div><div class=\"tp_abstract\" id=\"tp_abstract_58\" style=\"display:none;\"><div class=\"tp_abstract_entry\">Model Predictive Control (MPC) is attracting tremendous attention in the autonomous driving task as a powerful control technique. The success of an MPC controller strongly depends on an accurate internal dynamics model. However, the static parameters, usually learned by system identification, often fail to adapt to both internal and external perturbations in real-world scenarios. In this paper, we firstly (1) reformulate the problem as a Partially Observed Markov Decision Process (POMDP) that absorbs the uncertainties into observations and maintains Markov property into hidden states; and (2) learn a recurrent policy continually adapting the parameters of the dynamics model via Recurrent Reinforcement Learning (RRL) for optimal and adaptive control; and (3) finally evaluate the proposed algorithm (referred as MPC-RRL) in CARLA simulator and leading to robust behaviours under a wide range of perturbations.<\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('58','tp_abstract')\">Close<\/a><\/p><\/div><div class=\"tp_links\" id=\"tp_links_58\" style=\"display:none;\"><div class=\"tp_links_entry\"><ul class=\"tp_pub_list\"><li><i class=\"ai ai-doi\"><\/i><a class=\"tp_pub_list\" href=\"https:\/\/dx.doi.org\/https:\/\/doi.org\/10.48550\/arXiv.2301.13313\" title=\"Follow DOI:https:\/\/doi.org\/10.48550\/arXiv.2301.13313\" target=\"_blank\">doi:https:\/\/doi.org\/10.48550\/arXiv.2301.13313<\/a><\/li><\/ul><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('58','tp_links')\">Close<\/a><\/p><\/div><\/div><\/div><div class=\"tp_publication tp_publication_inproceedings\"><div class=\"tp_pub_number\">10.<\/div><div class=\"tp_pub_info\"><p class=\"tp_pub_author\"> Wang, Jianhong;  Wang, Jinxin;  Zhang, Yuan;  Gu, Yunjie;  Kim, Tae-Kyun<\/p><p class=\"tp_pub_title\"><a class=\"tp_title_link\" onclick=\"teachpress_pub_showhide('13','tp_links')\" style=\"cursor:pointer;\">SHAQ: Incorporating Shapley Value Theory into Multi-Agent Q-Learning<\/a> <span class=\"tp_pub_type tp_  inproceedings\">Proceedings Article<\/span> <\/p><p class=\"tp_pub_additional\"><span class=\"tp_pub_additional_in\">In: <\/span><span class=\"tp_pub_additional_booktitle\">Advances in Neural Information Processing Systems, <\/span><span class=\"tp_pub_additional_year\">2022<\/span><span class=\"tp_pub_additional_note\">, (Accepted at NeurIPS 2022 Conference)<\/span>.<\/p><p class=\"tp_pub_menu\"><span class=\"tp_resource_link\"><a id=\"tp_links_sh_13\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('13','tp_links')\" title=\"Show links and resources\" style=\"cursor:pointer;\">Links<\/a><\/span> | <span class=\"tp_bibtex_link\"><a id=\"tp_bibtex_sh_13\" class=\"tp_show\" onclick=\"teachpress_pub_showhide('13','tp_bibtex')\" title=\"Show BibTeX entry\" style=\"cursor:pointer;\">BibTeX<\/a><\/span><\/p><div class=\"tp_bibtex\" id=\"tp_bibtex_13\" style=\"display:none;\"><div class=\"tp_bibtex_entry\"><pre>@inproceedings{wang2021shaq,<br \/>\r\ntitle = {SHAQ: Incorporating Shapley Value Theory into Multi-Agent Q-Learning},<br \/>\r\nauthor = {Jianhong Wang and Jinxin Wang and Yuan Zhang and Yunjie Gu and Tae-Kyun Kim},<br \/>\r\nurl = {https:\/\/openreview.net\/forum?id=BjGawodFnOy<br \/>\r\nhttps:\/\/arxiv.org\/abs\/2105.15013},<br \/>\r\nyear  = {2022},<br \/>\r\ndate = {2022-07-04},<br \/>\r\nurldate = {2021-01-01},<br \/>\r\nbooktitle = {Advances in Neural Information Processing Systems},<br \/>\r\njournal = {arXiv preprint arXiv:2105.15013},<br \/>\r\nnote = {Accepted at NeurIPS 2022 Conference},<br \/>\r\nkeywords = {},<br \/>\r\npubstate = {published},<br \/>\r\ntppubtype = {inproceedings}<br \/>\r\n}<br \/>\r\n<\/pre><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('13','tp_bibtex')\">Close<\/a><\/p><\/div><div class=\"tp_links\" id=\"tp_links_13\" style=\"display:none;\"><div class=\"tp_links_entry\"><ul class=\"tp_pub_list\"><li><i class=\"fas fa-globe\"><\/i><a class=\"tp_pub_list\" href=\"https:\/\/openreview.net\/forum?id=BjGawodFnOy\" title=\"https:\/\/openreview.net\/forum?id=BjGawodFnOy\" target=\"_blank\">https:\/\/openreview.net\/forum?id=BjGawodFnOy<\/a><\/li><li><i class=\"ai ai-arxiv\"><\/i><a class=\"tp_pub_list\" href=\"https:\/\/arxiv.org\/abs\/2105.15013\" title=\"https:\/\/arxiv.org\/abs\/2105.15013\" target=\"_blank\">https:\/\/arxiv.org\/abs\/2105.15013<\/a><\/li><\/ul><\/div><p class=\"tp_close_menu\"><a class=\"tp_close\" onclick=\"teachpress_pub_showhide('13','tp_links')\">Close<\/a><\/p><\/div><\/div><\/div><\/div><\/div><\/div>\n\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<\/div>\n\t\t","protected":false},"excerpt":{"rendered":"<p>Neurobotics Lab, Department of Computer Science, University of Freiburg<\/p>\n","protected":false},"author":2,"featured_media":820,"comment_status":"open","ping_status":"open","sticky":false,"template":"elementor_header_footer","format":"standard","meta":{"footnotes":""},"categories":[9,8],"tags":[],"class_list":["post-819","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-esr","category-people"],"_links":{"self":[{"href":"https:\/\/elo-x.eu\/index.php?rest_route=\/wp\/v2\/posts\/819","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/elo-x.eu\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/elo-x.eu\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/elo-x.eu\/index.php?rest_route=\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/elo-x.eu\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=819"}],"version-history":[{"count":26,"href":"https:\/\/elo-x.eu\/index.php?rest_route=\/wp\/v2\/posts\/819\/revisions"}],"predecessor-version":[{"id":2599,"href":"https:\/\/elo-x.eu\/index.php?rest_route=\/wp\/v2\/posts\/819\/revisions\/2599"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/elo-x.eu\/index.php?rest_route=\/wp\/v2\/media\/820"}],"wp:attachment":[{"href":"https:\/\/elo-x.eu\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=819"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/elo-x.eu\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=819"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/elo-x.eu\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=819"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}