C言語ではないのですが、もしわかる方がいらっしゃれば教えていただきたく思います。
Pythonで入力されたデータセットを0と1に分類し、その入力されたデータセットと共に結果を出力するというプログラムを作成中です。
しかしターミナルで実行したところ以下のようなエラーが出てしまいます。どのように解決すれば良いのか分からない状態です。
また、入力されたデータの数値を一緒にcsvファイルに書き込むプログラムの書き方がわかりません。
もしご助言をいただけるなら幸いです。よろしくお願いいたします。
該当するプログラムのコード(classification.py)
import time
import torch
import pandas as pd
import numpy as np
import csv
# データサイズ(I行J列)
DATA_SIZE_I = 11
DATA_SIZE_J = 11
# ファイルパス
TRAIN_CSV_PATH = r"C:\Users\Tokyo\.vscode\su\train_data_noise.csv"
VALIDATION_CSV_PATH = r"C:\Users\Tokyo\.vscode\su\validation_data_noise.csv"
TEST_CSV_PATH = r"C:\Users\Tokyo\.vscode\su\test_data_noise.csv"
OUTPUT_CSV_PATH = 'result_noise.csv'
LOSS_CSV_PATH = '.result_losses.csv'
# ハイパーパラメータ
EPOCHS = 10
BATCH = 100
lr = 1.0e-4
NODE_NUM = DATA_SIZE_I*DATA_SIZE_J
class BGS(torch.utils.data.Dataset):
def __init__(self, csv_path):
df = pd.read_csv(csv_path, header=0, dtype='float')
data = df.iloc[:, 1:].values
data = data_transform(data)
self.data = torch.tensor(data, dtype=torch.float32)
labels = df.iloc[:, 0].values
self.labels = torch.tensor(labels, dtype=torch.long)
def __len__(self):
return len(self.data)
def __getitem__(self, index):
return self.data[index], self.labels[index]
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = torch.nn.Linear(DATA_SIZE_I*DATA_SIZE_J, NODE_NUM*5)
self.fc2 = torch.nn.Linear(NODE_NUM*5, NODE_NUM)
self.fc3 = torch.nn.Linear(NODE_NUM, 2)
def forward(self, x):
x = self.fc1(x)
x = torch.nn.functional.relu(x)
x = self.fc2(x)
x = torch.nn.functional.relu(x)
x = self.fc3(x)
x = torch.nn.functional.softmax(x, dim=1)
return x
def data_transform(x):
return (x - np.mean(x, axis=1, keepdims=True)) / np.std(x, axis=1, ddof=1, keepdims=True)
# 学習用関数
def train(train_loader, model, optimizer, criterion, device, num_epochs, epoch, losses):
model.train() # モデルを学習モードに変更
train_loss = 0
train_acc = 0
i = 0
for i, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad() # 勾配を初期化
output = model(data) # 順伝播の計算
loss = criterion(output, target) # 誤差を計算
train_loss += float(loss.item())
preds = torch.max(output, 1)[1]
train_acc += torch.sum(preds == target).item() / len(target)
loss.backward() # 誤差を逆伝播させる
optimizer.step() # 重みを更新する
losses.append(train_loss)
print('#train_epoch: [{}/{}], Loss: {}, acc: {}'.format(epoch + 1, num_epochs, train_loss / (i+1), train_acc / (i+1)), end=' ')
def validation(validation_loader, model, criterion, device, num_epochs, epoch, losses):
model.eval() # モデルを推論モードに変更
val_loss = 0
val_acc = 0
i = 0
with torch.no_grad():
for i, (data, target) in enumerate(validation_loader):
data, target = data.to(device), target.to(device)
output = model(data)
loss = criterion(output, target) # 誤差を計算
val_loss += float(loss.item())
preds = torch.max(output, 1)[1]
val_acc += torch.sum(preds == target).item() / len(target)
losses.append(val_loss)
print('#val_epoch: [{}/{}], Loss: {}, acc: {}'.format(epoch + 1, num_epochs, val_loss / (i+1), val_acc / (i+1)))
def test(test_loader, model, device):
model.eval() # モデルを推論モードに変更
with torch.no_grad():
for i, (data, target) in enumerate(test_loader):
data, target = data.to(device), target.to(device)
output = model(data)
preds = torch.max(output, 1)[1]
accuracy = torch.sum(preds == target).item() / len(target)
target, preds = target.cpu().numpy(), preds.cpu().numpy()
target, preds = pd.DataFrame(target), pd.DataFrame(preds)
df = pd.concat([target, preds], axis=1)
with open(OUTPUT_CSV_PATH, 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(['accuracy', '{}'.format(accuracy)])
writer.writerow(['labels', 'predict'])
df.to_csv(OUTPUT_CSV_PATH, mode='a', header=False, index=False)
def main():
# 1. GPUの設定(PyTorchでは明示的に指定する必要がある)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
# 3. データセットを取得
train_data = BGS(TRAIN_CSV_PATH)
validation_data = BGS(VALIDATION_CSV_PATH)
test_data = BGS(TEST_CSV_PATH)
# 4. DataLoaderの作成
train_loader = torch.utils.data.DataLoader(train_data, batch_size=BATCH, shuffle=True, num_workers=2)
validation_loader = torch.utils.data.DataLoader(validation_data, batch_size=BATCH, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=test_data.__len__(), shuffle=False)
# 5. モデル作成
torch.manual_seed(0)
model = Net().to(device)
# 6. 損失関数を定義
criterion = torch.nn.CrossEntropyLoss()
# 7. 最適化手法を定義
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
# 8. 出力用ファイル作成
train_losses = []
# train_accuracy = []
validation_losses = []
# validation_accuracy = []
# 9. 学習(エポック終了時点ごとにテスト用データで評価)
start = time.time()
for epoch in range(EPOCHS):
train(train_loader, model, optimizer, criterion, device, EPOCHS, epoch, train_losses)
validation(validation_loader, model, criterion, device, EPOCHS, epoch, validation_losses)
test(test_loader, model, device)
process_time = time.time() - start
print("process_time:{0}".format(process_time) + "[sec]")
# train_losses, validation_losses = pd.DataFrame(train_losses), pd.DataFrame(validation_losses)
# df_losses = pd.concat([train_losses/1000, validation_losses/100], axis=1)
# df_losses.to_csv(LOSS_CSV_PATH, mode='a', header=False, index=False)
if __name__ == '__main__':
main()
PS C:\Users\Tokyo\PycharmProjects\SuNN> python classification.py
cuda
Traceback (most recent call last):
File "pandas\_libs\parsers.pyx", line 1050, in pandas._libs.parsers.TextReader._convert_tokens
TypeError: Cannot cast array data from dtype('O') to dtype('float64') according to the rule 'safe'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "classification.py", line 163, in <module>
main()
File "classification.py", line 124, in main
train_data = BGS(TRAIN_CSV_PATH)
File "classification.py", line 28, in __init__
df = pd.read_csv(csv_path, header=0, dtype='float')
File "C:\Users\Tokyo\anaconda3\envs\pytorch19\lib\site-packages\pandas\io\parsers.py", line 610, in read_csv
return _read(filepath_or_buffer, kwds)
File "C:\Users\Tokyo\anaconda3\envs\pytorch19\lib\site-packages\pandas\io\parsers.py", line 468, in _read
return parser.read(nrows)
File "C:\Users\Tokyo\anaconda3\envs\pytorch19\lib\site-packages\pandas\io\parsers.py", line 1057, in read
index, columns, col_dict = self._engine.read(nrows)
File "C:\Users\Tokyo\anaconda3\envs\pytorch19\lib\site-packages\pandas\io\parsers.py", line 2061, in read
data = self._reader.read(nrows)
File "pandas\_libs\parsers.pyx", line 756, in pandas._libs.parsers.TextReader.read
File "pandas\_libs\parsers.pyx", line 771, in pandas._libs.parsers.TextReader._read_low_memory
File "pandas\_libs\parsers.pyx", line 850, in pandas._libs.parsers.TextReader._read_rows
File "pandas\_libs\parsers.pyx", line 982, in pandas._libs.parsers.TextReader._convert_column_data
File "pandas\_libs\parsers.pyx", line 1056, in pandas._libs.parsers.TextReader._convert_tokens
ValueError: could not convert string to float: ' 0.5185022.336320e-001'