<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>http://quantsoftware.gatech.edu/index.php?action=history&amp;feed=atom&amp;title=MC3-Project-2</id>
	<title>MC3-Project-2 - Revision history</title>
	<link rel="self" type="application/atom+xml" href="http://quantsoftware.gatech.edu/index.php?action=history&amp;feed=atom&amp;title=MC3-Project-2"/>
	<link rel="alternate" type="text/html" href="http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;action=history"/>
	<updated>2026-04-14T23:10:49Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.34.4</generator>
	<entry>
		<id>http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=2006&amp;oldid=prev</id>
		<title>Dave: /* DRAFT */</title>
		<link rel="alternate" type="text/html" href="http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=2006&amp;oldid=prev"/>
		<updated>2017-07-03T19:37:25Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;DRAFT&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 19:37, 3 July 2017&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l1&quot; &gt;Line 1:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 1:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;==DRAFT==&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;This assignment is under revision.  This notice will be removed once it is final.&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;==Updates / FAQs==&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;==Updates / FAQs==&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Dave</name></author>
		
	</entry>
	<entry>
		<id>http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=2004&amp;oldid=prev</id>
		<title>Bhrolenok3: /* Rubric */ Note about negative reward in rubric</title>
		<link rel="alternate" type="text/html" href="http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=2004&amp;oldid=prev"/>
		<updated>2017-07-02T18:16:47Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Rubric: &lt;/span&gt; Note about negative reward in rubric&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 18:16, 2 July 2017&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l196&quot; &gt;Line 196:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 196:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Benchmark: As a benchmark to compare your solution to, we will run our reference solution in the same world, with 500 epochs.  We will take the median reward of our reference across all of those 500 epochs.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Benchmark: As a benchmark to compare your solution to, we will run our reference solution in the same world, with 500 epochs.  We will take the median reward of our reference across all of those 500 epochs.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Your score: For each world we will take the median cost your solution finds across all 500 epochs.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Your score: For each world we will take the median cost your solution finds across all 500 epochs.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* For a test to be successful, your learner should find a total reward &amp;gt;= 1.5 x the benchmark.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* For a test to be successful, your learner should find a total reward &amp;gt;= 1.5 x the benchmark&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;. Note that since reward for a single epoch is negative, your solution can be up to 50% worse than the reference solution and still pass&lt;/ins&gt;.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* There are 10 test cases, each test case is worth 9.5 points.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* There are 10 test cases, each test case is worth 9.5 points.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Here is how we will initialize your QLearner for these test cases:&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Here is how we will initialize your QLearner for these test cases:&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l211&quot; &gt;Line 211:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 211:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;/PRE&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;/PRE&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* For Dyna-Q, we will set dyna = 200.  We will test your learner against &amp;lt;tt&amp;gt;world01.csv&amp;lt;/tt&amp;gt; and &amp;lt;tt&amp;gt;world02.csv&amp;lt;/tt&amp;gt; with 50 epochs.  Scoring is similar to the non-dyna case: Each test should complete in less than 10 seconds. For the test to be successful, your learner should find solution with total reward to the goal &amp;gt;= 1.5 x the  median reward our reference solution across all 50 epochs.  We will check this by taking the median of all 50 runs. Each test case is worth 2.5 points.  We will initialize your learner with the following parameter values for these test cases:&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* For Dyna-Q, we will set dyna = 200.  We will test your learner against &amp;lt;tt&amp;gt;world01.csv&amp;lt;/tt&amp;gt; and &amp;lt;tt&amp;gt;world02.csv&amp;lt;/tt&amp;gt; with 50 epochs.  Scoring is similar to the non-dyna case: Each test should complete in less than 10 seconds. For the test to be successful, your learner should find solution with total reward to the goal &amp;gt;= 1.5 x the  median reward our reference solution across all 50 epochs&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;.  Note that since reward for a single epoch is negative, your solution can be up to 50% worse than the reference solution and still pass&lt;/ins&gt;.  We will check this by taking the median of all 50 runs. Each test case is worth 2.5 points.  We will initialize your learner with the following parameter values for these test cases:&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;Pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;Pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Bhrolenok3</name></author>
		
	</entry>
	<entry>
		<id>http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=2003&amp;oldid=prev</id>
		<title>Bhrolenok3: /* Rubric */ iterations-&gt;epochs, clarified language, noted epoch step limit</title>
		<link rel="alternate" type="text/html" href="http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=2003&amp;oldid=prev"/>
		<updated>2017-07-02T18:06:14Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Rubric: &lt;/span&gt; iterations-&amp;gt;epochs, clarified language, noted epoch step limit&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 18:06, 2 July 2017&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l193&quot; &gt;Line 193:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 193:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Only your QLearner class will be tested.   &lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Only your QLearner class will be tested.   &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* For basic Q-Learning (dyna = 0) we will test your learner against 10 test worlds with 500 &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;iterations &lt;/del&gt;in each world.  One &amp;quot;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;iteration&lt;/del&gt;&amp;quot; means your robot reaches the goal one time.  Your QLearner retains its state (Q-table), and then we allow it to navigate to the goal again, over and over, 500 times.  Each test (500 &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;iterations&lt;/del&gt;) should complete in less than 2 seconds.   &lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* For basic Q-Learning (dyna = 0) we will test your learner against 10 test worlds with 500 &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;epochs &lt;/ins&gt;in each world.  One &amp;quot;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;epoch&lt;/ins&gt;&amp;quot; means your robot reaches the goal one time&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;, or after 100000 steps, whichever comes first&lt;/ins&gt;.  Your QLearner retains its state (Q-table), and then we allow it to navigate to the goal again, over and over, 500 times.  Each test (500 &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;epochs&lt;/ins&gt;) should complete in less than 2 seconds.  &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;&amp;lt;b&amp;gt;NOTE&amp;lt;/b&amp;gt;: an epoch where the robot fails to reach the goal will likely take &amp;lt;b&amp;gt;much&amp;lt;/b&amp;gt; longer (in running time), than one that does reach the goal, and is a common reason for failing to complete test cases within the time limit.&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Benchmark: As a benchmark to compare your solution to, we will run our reference solution in the same world, with 500 &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;iterations&lt;/del&gt;.  We will take the median reward of our reference across all of those 500 &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;iterations&lt;/del&gt;.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Benchmark: As a benchmark to compare your solution to, we will run our reference solution in the same world, with 500 &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;epochs&lt;/ins&gt;.  We will take the median reward of our reference across all of those 500 &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;epochs&lt;/ins&gt;.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Your score: For each world we will take the median cost your solution finds across all 500 &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;iterations&lt;/del&gt;.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Your score: For each world we will take the median cost your solution finds across all 500 &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;epochs&lt;/ins&gt;.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* For a test to be successful, your learner should find a total reward &amp;gt;= 1.5 x the benchmark.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* For a test to be successful, your learner should find a total reward &amp;gt;= 1.5 x the benchmark.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* There are 10 test cases, each test case is worth 9.5 points.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* There are 10 test cases, each test case is worth 9.5 points.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* Here is how we will initialize your QLearner for these test cases:&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;Pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;Pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l210&quot; &gt;Line 210:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 211:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;/PRE&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;/PRE&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* For Dyna-Q, we will set dyna = 200.  We will test your learner against &amp;lt;tt&amp;gt;world01.csv&amp;lt;/tt&amp;gt; and &amp;lt;tt&amp;gt;world02.csv&amp;lt;/tt&amp;gt; with 50 &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;iterations&lt;/del&gt;.  Scoring is similar to the non-dyna case: Each test should complete in less than 10 seconds. For the test to be successful, your learner should find solution with total reward to the goal &amp;gt;= 1.5 x the  median reward our reference solution across all &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;iterations&lt;/del&gt;.  We will check this by taking the median of all 50 runs. Each test case is worth 2.5 points.  We will initialize your learner with the following parameter values:&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* For Dyna-Q, we will set dyna = 200.  We will test your learner against &amp;lt;tt&amp;gt;world01.csv&amp;lt;/tt&amp;gt; and &amp;lt;tt&amp;gt;world02.csv&amp;lt;/tt&amp;gt; with 50 &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;epochs&lt;/ins&gt;.  Scoring is similar to the non-dyna case: Each test should complete in less than 10 seconds. For the test to be successful, your learner should find solution with total reward to the goal &amp;gt;= 1.5 x the  median reward our reference solution across all &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;50 epochs&lt;/ins&gt;.  We will check this by taking the median of all 50 runs. Each test case is worth 2.5 points.  We will initialize your learner with the following parameter values &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;for these test cases&lt;/ins&gt;:&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;Pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;Pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Bhrolenok3</name></author>
		
	</entry>
	<entry>
		<id>http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=2002&amp;oldid=prev</id>
		<title>Bhrolenok3: /* Part 3: Implement Dyna (5%) */</title>
		<link rel="alternate" type="text/html" href="http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=2002&amp;oldid=prev"/>
		<updated>2017-07-02T17:57:12Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Part 3: Implement Dyna (5%)&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 17:57, 2 July 2017&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l143&quot; &gt;Line 143:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 143:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;==Part 3: Implement Dyna (5%)==&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;==Part 3: Implement Dyna (5%)==&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Add additional components to your QLearner class so that multiple &amp;quot;hallucinated&amp;quot; experience tuples are used to update the Q-table for each &amp;quot;real&amp;quot; experience.  The addition of this component should speed convergence in terms of the number of calls to query().&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Add additional components to your QLearner class so that multiple &amp;quot;hallucinated&amp;quot; experience tuples are used to update the Q-table for each &amp;quot;real&amp;quot; experience.  The addition of this component should speed convergence in terms of the number of calls to query()&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;, &amp;lt;b&amp;gt;not necessarily running time&amp;lt;/b&amp;gt;&lt;/ins&gt;.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Note that it is not important that you implement Dyna exactly as described in the lecture.  The key requirement is that your code should somehow hallucinate additional experiences.  The precise method you use for discovering those experiences is flexible.  We will test your code on several test worlds with 50 &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;iterations &lt;/del&gt;and with dyna = 200.  Our expectation is that with Dyna, the solution should be much better after 50 &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;iterations &lt;/del&gt;than without.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Note that it is not important that you implement Dyna exactly as described in the lecture.  The key requirement is that your code should somehow hallucinate additional experiences.  The precise method you use for discovering those experiences is flexible.  We will test your code on several test worlds with 50 &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;epochs &lt;/ins&gt;and with dyna = 200.  Our expectation is that with Dyna, the solution should be much better after 50 &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;epochs &lt;/ins&gt;than without.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;==Part 4: Implement author() Method (0%)==&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;==Part 4: Implement author() Method (0%)==&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Bhrolenok3</name></author>
		
	</entry>
	<entry>
		<id>http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=2001&amp;oldid=prev</id>
		<title>Bhrolenok3: /* Part 2: Navigation Problem Test Cases */ iterations-&gt;epochs</title>
		<link rel="alternate" type="text/html" href="http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=2001&amp;oldid=prev"/>
		<updated>2017-07-02T17:55:22Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Part 2: Navigation Problem Test Cases: &lt;/span&gt; iterations-&amp;gt;epochs&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 17:55, 2 July 2017&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l106&quot; &gt;Line 106:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 106:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* 1 if the robot moves to the goal space&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* 1 if the robot moves to the goal space&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Overall, we will assess the performance of a policy as the &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;average &lt;/del&gt;reward it incurs to travel from the start to the goal (higher reward is better).  We assess a learner in terms of the reward it converges to over a given number of training &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;iterations &lt;/del&gt;(trips from start to goal).  &amp;#039;&amp;#039;&amp;#039;Important note:&amp;#039;&amp;#039;&amp;#039; the problem includes random actions.  So, for example, if your learner responds with a &amp;quot;move north&amp;quot; action, there is some probability that the robot will actually move in a different direction.  For this reason, the &amp;quot;wise&amp;quot; learner develops policies that keep the robot well away from quicksand.  We map this problem to a reinforcement learning problem as follows:&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Overall, we will assess the performance of a policy as the &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;median &lt;/ins&gt;reward it incurs to travel from the start to the goal (higher reward is better).  We assess a learner in terms of the reward it converges to over a given number of training &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;epochs &lt;/ins&gt;(trips from start to goal).  &amp;#039;&amp;#039;&amp;#039;Important note:&amp;#039;&amp;#039;&amp;#039; the problem includes random actions.  So, for example, if your learner responds with a &amp;quot;move north&amp;quot; action, there is some probability that the robot will actually move in a different direction.  For this reason, the &amp;quot;wise&amp;quot; learner develops policies that keep the robot well away from quicksand.  We map this problem to a reinforcement learning problem as follows:&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* State: The state is the location of the robot, it is computed (discretized) as: column location * 10 + row location.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* State: The state is the location of the robot, it is computed (discretized) as: column location * 10 + row location.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l113&quot; &gt;Line 113:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 113:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* T: The transition matrix can be inferred from the CSV map and the actions.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* T: The transition matrix can be inferred from the CSV map and the actions.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Note that R and T are not known by or available to the learner.  The &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;testing &lt;/del&gt;code &amp;lt;tt&amp;gt;testqlearner.py&amp;lt;/tt&amp;gt; will test your code as follows (pseudo code):&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Note that R and T are not known by or available to the learner.  The code &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;in &lt;/ins&gt;&amp;lt;tt&amp;gt;testqlearner.py&amp;lt;/tt&amp;gt; will test your code as follows (pseudo code):&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Bhrolenok3</name></author>
		
	</entry>
	<entry>
		<id>http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=2000&amp;oldid=prev</id>
		<title>Bhrolenok3: /* Template and Data */ updated paths for Summer 2017</title>
		<link rel="alternate" type="text/html" href="http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=2000&amp;oldid=prev"/>
		<updated>2017-07-02T17:50:53Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Template and Data: &lt;/span&gt; updated paths for Summer 2017&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 17:50, 2 July 2017&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l24&quot; &gt;Line 24:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 24:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;==Template and Data==&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;==Template and Data==&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Update your local &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;mc3_p2 &lt;/del&gt;directory using github.  &lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Update your local &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;mc3p2_qlearning_robot &lt;/ins&gt;directory using github.  &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Implement the &amp;lt;tt&amp;gt;QLearner&amp;lt;/tt&amp;gt; class in &amp;lt;tt&amp;gt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;mc3_p2&lt;/del&gt;/QLearner.py&amp;lt;/tt&amp;gt;.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Implement the &amp;lt;tt&amp;gt;QLearner&amp;lt;/tt&amp;gt; class in &amp;lt;tt&amp;gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;mc3p2_qlearning_robot&lt;/ins&gt;/QLearner.py&amp;lt;/tt&amp;gt;.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* To &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;test &lt;/del&gt;your Q-learner, run &amp;lt;tt&amp;gt;&amp;#039;&amp;#039;&amp;#039;python testqlearner.py&amp;#039;&amp;#039;&amp;#039;&amp;lt;/tt&amp;gt; from the &amp;lt;tt&amp;gt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;mc3_p2&lt;/del&gt;/&amp;lt;/tt&amp;gt; directory.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* To &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;debug &lt;/ins&gt;your Q-learner, run &amp;lt;tt&amp;gt;&amp;#039;&amp;#039;&amp;#039;python testqlearner.py&amp;#039;&amp;#039;&amp;#039;&amp;lt;/tt&amp;gt; from the &amp;lt;tt&amp;gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;mc3p2_qlearning_robot&lt;/ins&gt;/&amp;lt;/tt&amp;gt; directory&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;. The grading script for this project is &amp;lt;tt&amp;gt;grade_robot_qlearning.py&amp;lt;/tt&amp;gt;&lt;/ins&gt;.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Note that example navigation problems are provided in the &amp;lt;tt&amp;gt;&lt;del class=&quot;diffchange diffchange-inline&quot;&gt;mc3_p2&lt;/del&gt;/testworlds&amp;lt;/tt&amp;gt; directory&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Note that example navigation problems are provided in the &amp;lt;tt&amp;gt;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;mc3p2_qlearning_robot&lt;/ins&gt;/testworlds&amp;lt;/tt&amp;gt; directory&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;.&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;==Part 1: Implement Q-Learner (95%)==&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;==Part 1: Implement Q-Learner (95%)==&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Bhrolenok3</name></author>
		
	</entry>
	<entry>
		<id>http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=1982&amp;oldid=prev</id>
		<title>Tucker: /* Part 1: Implement Q-Learner (95%) */</title>
		<link rel="alternate" type="text/html" href="http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=1982&amp;oldid=prev"/>
		<updated>2017-06-13T17:28:51Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Part 1: Implement Q-Learner (95%)&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 17:28, 13 June 2017&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l33&quot; &gt;Line 33:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 33:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Your QLearner class should be implemented in the file &amp;lt;tt&amp;gt;QLearner.py&amp;lt;/tt&amp;gt;.  It should implement EXACTLY the API defined below.  DO NOT import any modules besides those allowed below.  Your class should implement the following methods:&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Your QLearner class should be implemented in the file &amp;lt;tt&amp;gt;QLearner.py&amp;lt;/tt&amp;gt;.  It should implement EXACTLY the API defined below.  DO NOT import any modules besides those allowed below.  Your class should implement the following methods:&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;b&amp;gt;The constructor QLearner()&amp;lt;/b&amp;gt; should reserve space for keeping track of Q[s, a] for the number of states and actions.  It should initialize Q[] with &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;uniform random values between -1.0 and 1.0&lt;/del&gt;.  Details on the input arguments to the constructor:&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;b&amp;gt;The constructor QLearner()&amp;lt;/b&amp;gt; should reserve space for keeping track of Q[s, a] for the number of states and actions.  It should initialize Q[] with &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;all zeros&lt;/ins&gt;.  Details on the input arguments to the constructor:&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* &amp;lt;tt&amp;gt;num_states&amp;lt;/tt&amp;gt; integer, the number of states to consider&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* &amp;lt;tt&amp;gt;num_states&amp;lt;/tt&amp;gt; integer, the number of states to consider&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Tucker</name></author>
		
	</entry>
	<entry>
		<id>http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=1962&amp;oldid=prev</id>
		<title>Tucker: /* Updates / FAQs */</title>
		<link rel="alternate" type="text/html" href="http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=1962&amp;oldid=prev"/>
		<updated>2017-05-19T23:03:42Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Updates / FAQs&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 23:03, 19 May 2017&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l1&quot; &gt;Line 1:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 1:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;==DRAFT==&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;This assignment is under revision.  This notice will be removed once it is final.&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;ins style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;&lt;/ins&gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;==Updates / FAQs==&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;==Updates / FAQs==&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Tucker</name></author>
		
	</entry>
	<entry>
		<id>http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=1882&amp;oldid=prev</id>
		<title>Tucker: /* Required, Allowed &amp; Prohibited */</title>
		<link rel="alternate" type="text/html" href="http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=1882&amp;oldid=prev"/>
		<updated>2017-04-02T15:06:03Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Required, Allowed &amp;amp; Prohibited&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 15:06, 2 April 2017&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l233&quot; &gt;Line 233:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 233:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* You may reuse sections of code (up to 5 lines) that you collected from other students or the internet.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* You may reuse sections of code (up to 5 lines) that you collected from other students or the internet.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Code provided by the instructor, or allowed by the instructor to be shared.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Code provided by the instructor, or allowed by the instructor to be shared.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&lt;del style=&quot;font-weight: bold; text-decoration: none;&quot;&gt;* Use util.py (only) for reading data.&lt;/del&gt;&lt;/div&gt;&lt;/td&gt;&lt;td colspan=&quot;2&quot;&gt; &lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Prohibited:&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;Prohibited:&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Tucker</name></author>
		
	</entry>
	<entry>
		<id>http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=1878&amp;oldid=prev</id>
		<title>Tucker: /* Rubric */</title>
		<link rel="alternate" type="text/html" href="http://quantsoftware.gatech.edu/index.php?title=MC3-Project-2&amp;diff=1878&amp;oldid=prev"/>
		<updated>2017-03-30T16:57:07Z</updated>

		<summary type="html">&lt;p&gt;&lt;span dir=&quot;auto&quot;&gt;&lt;span class=&quot;autocomment&quot;&gt;Rubric&lt;/span&gt;&lt;/span&gt;&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 16:57, 30 March 2017&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l191&quot; &gt;Line 191:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 191:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* For basic Q-Learning (dyna = 0) we will test your learner against 10 test worlds with 500 iterations in each world.  One &amp;quot;iteration&amp;quot; means your robot reaches the goal one time.  Your QLearner retains its state (Q-table), and then we allow it to navigate to the goal again, over and over, 500 times.  Each test (500 iterations) should complete in less than 2 seconds.   &lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* For basic Q-Learning (dyna = 0) we will test your learner against 10 test worlds with 500 iterations in each world.  One &amp;quot;iteration&amp;quot; means your robot reaches the goal one time.  Your QLearner retains its state (Q-table), and then we allow it to navigate to the goal again, over and over, 500 times.  Each test (500 iterations) should complete in less than 2 seconds.   &lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Benchmark: As a benchmark to compare your solution to, we will run our reference solution in the same world, with 500 iterations.  We will take the median reward of our reference across all of those 500 iterations.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Benchmark: As a benchmark to compare your solution to, we will run our reference solution in the same world, with 500 iterations.  We will take the median reward of our reference across all of those 500 iterations.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Your score: For each world we will take the &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;min &lt;/del&gt;cost your solution finds across all 500 iterations.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* Your score: For each world we will take the &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;median &lt;/ins&gt;cost your solution finds across all 500 iterations.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* For a test to be successful, your learner should find a total reward &amp;gt;= 1.5 x the benchmark.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* For a test to be successful, your learner should find a total reward &amp;gt;= 1.5 x the benchmark.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* There are 10 test cases, each test case is worth 9.5 points.&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* There are 10 test cases, each test case is worth 9.5 points.&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l206&quot; &gt;Line 206:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 206:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;/PRE&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;/PRE&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* For Dyna-Q, we will set dyna = 200.  We will test your learner against &amp;lt;tt&amp;gt;world01.csv&amp;lt;/tt&amp;gt; and &amp;lt;tt&amp;gt;world02.csv&amp;lt;/tt&amp;gt; with 50 iterations.  Scoring is similar to the non-dyna case: Each test should complete in less than 10 seconds. For the test to be successful, your learner should find solution with total reward to the goal &amp;gt;= 1.5 x the  median reward our reference solution across all iterations.  We will check this by taking the &lt;del class=&quot;diffchange diffchange-inline&quot;&gt;min &lt;/del&gt;of all 50 runs. Each test case is worth 2.5 points.  We will initialize your learner with the following parameter values:&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;* For Dyna-Q, we will set dyna = 200.  We will test your learner against &amp;lt;tt&amp;gt;world01.csv&amp;lt;/tt&amp;gt; and &amp;lt;tt&amp;gt;world02.csv&amp;lt;/tt&amp;gt; with 50 iterations.  Scoring is similar to the non-dyna case: Each test should complete in less than 10 seconds. For the test to be successful, your learner should find solution with total reward to the goal &amp;gt;= 1.5 x the  median reward our reference solution across all iterations.  We will check this by taking the &lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;median &lt;/ins&gt;of all 50 runs. Each test case is worth 2.5 points.  We will initialize your learner with the following parameter values:&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;Pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class=&#039;diff-marker&#039;&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;&amp;lt;Pre&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Tucker</name></author>
		
	</entry>
</feed>