diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/DouOne.iml b/.idea/DouOne.iml
new file mode 100644
index 0000000..8b8c395
--- /dev/null
+++ b/.idea/DouOne.iml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..a739919
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/douzero/dmc/models.py b/douzero/dmc/models.py
index 4343ddf..53e017c 100644
--- a/douzero/dmc/models.py
+++ b/douzero/dmc/models.py
@@ -80,9 +80,9 @@ def forward(self, z, x, return_value=False, flags=None):
# Model dict is only used in evaluation but not training
model_dict = {}
-model_dict['landlord'] = LandlordLstmModel
-model_dict['landlord_up'] = FarmerLstmModel
-model_dict['landlord_down'] = FarmerLstmModel
+model_dict['landlord'] = GeneralModel
+model_dict['landlord_up'] = GeneralModel
+model_dict['landlord_down'] = GeneralModel
class Model:
"""
@@ -93,9 +93,9 @@ def __init__(self, device=0):
self.models = {}
if not device == "cpu":
device = 'cuda:' + str(device)
- self.models['landlord'] = LandlordLstmModel().to(torch.device(device))
- self.models['landlord_up'] = FarmerLstmModel().to(torch.device(device))
- self.models['landlord_down'] = FarmerLstmModel().to(torch.device(device))
+ self.models['landlord'] = GeneralModel().to(torch.device(device))
+ self.models['landlord_up'] = GeneralModel().to(torch.device(device))
+ self.models['landlord_down'] = GeneralModel().to(torch.device(device))
def forward(self, position, z, x, training=False, flags=None):
model = self.models[position]
@@ -119,3 +119,85 @@ def get_model(self, position):
def get_models(self):
return self.models
+
+# Added from https://github.com/Vincentzyx/Douzero_Resnet/blob/main/douzero/dmc/models.py
+
+# 用于ResNet18和34的残差块,用的是2个3x3的卷积
+class BasicBlock(nn.Module):
+ expansion = 1
+
+ def __init__(self, in_planes, planes, stride=1):
+ super(BasicBlock, self).__init__()
+ self.conv1 = nn.Conv1d(in_planes, planes, kernel_size=(3,),
+ stride=(stride,), padding=1, bias=False)
+ self.bn1 = nn.BatchNorm1d(planes)
+ self.conv2 = nn.Conv1d(planes, planes, kernel_size=(3,),
+ stride=(1,), padding=1, bias=False)
+ self.bn2 = nn.BatchNorm1d(planes)
+ self.shortcut = nn.Sequential()
+ # 经过处理后的x要与x的维度相同(尺寸和深度)
+ # 如果不相同,需要添加卷积+BN来变换为同一维度
+ if stride != 1 or in_planes != self.expansion * planes:
+ self.shortcut = nn.Sequential(
+ nn.Conv1d(in_planes, self.expansion * planes,
+ kernel_size=(1,), stride=(stride,), bias=False),
+ nn.BatchNorm1d(self.expansion * planes)
+ )
+
+ def forward(self, x):
+ out = F.relu(self.bn1(self.conv1(x)))
+ out = self.bn2(self.conv2(out))
+ out += self.shortcut(x)
+ out = F.relu(out)
+ return out
+
+
+class GeneralModel(nn.Module):
+ def __init__(self):
+ super().__init__()
+ self.in_planes = 80
+ #input 1*54*41
+ self.conv1 = nn.Conv1d(40, 80, kernel_size=(3,),
+ stride=(2,), padding=1, bias=False) #1*27*80
+
+ self.bn1 = nn.BatchNorm1d(80)
+
+ self.layer1 = self._make_layer(BasicBlock, 80, 2, stride=2)#1*14*80
+ self.layer2 = self._make_layer(BasicBlock, 160, 2, stride=2)#1*7*160
+ self.layer3 = self._make_layer(BasicBlock, 320, 2, stride=2)#1*4*320
+ # self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
+ self.linear1 = nn.Linear(320 * BasicBlock.expansion * 4 + 15 * 4, 1024)
+ self.linear2 = nn.Linear(1024, 512)
+ self.linear3 = nn.Linear(512, 256)
+ self.linear4 = nn.Linear(256, 1)
+
+ def _make_layer(self, block, planes, num_blocks, stride):
+ strides = [stride] + [1] * (num_blocks - 1)
+ layers = []
+ for stride in strides:
+ layers.append(block(self.in_planes, planes, stride))
+ self.in_planes = planes * block.expansion
+ return nn.Sequential(*layers)
+
+ def forward(self, z, x, return_value=False, flags=None, debug=False):
+ out = F.relu(self.bn1(self.conv1(z)))
+ out = self.layer1(out)
+ out = self.layer2(out)
+ out = self.layer3(out)
+ out = out.flatten(1,2)
+ out = torch.cat([x,x,x,x,out], dim=-1)
+ out = F.leaky_relu_(self.linear1(out))
+ out = F.leaky_relu_(self.linear2(out))
+ out = F.leaky_relu_(self.linear3(out))
+ out = F.leaky_relu_(self.linear4(out))
+ if return_value:
+ return dict(values=out)
+ else:
+ if flags is not None and flags.exp_epsilon > 0 and np.random.rand() < flags.exp_epsilon:
+ action = torch.randint(out.shape[0], (1,))[0]
+ else:
+ action = torch.argmax(out,dim=0)[0]
+ return dict(action=action, max_value=torch.max(out))
+
+
+