From 5da9f611e68017badbba28be124789002247c147 Mon Sep 17 00:00:00 2001 From: Paul Wang Date: Sun, 4 May 2025 11:21:52 +0000 Subject: [PATCH 01/10] =?UTF-8?q?=E8=A9=A6=E8=A9=A6=E7=9C=8Bcommit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- HI | 1 + 1 file changed, 1 insertion(+) create mode 100644 HI diff --git a/HI b/HI new file mode 100644 index 0000000..ae8d4cf --- /dev/null +++ b/HI @@ -0,0 +1 @@ +ccccsdfaksdjfadjsflksjdf \ No newline at end of file From 9ba7b6d444bbe1a739c31fb0422d9b60558e2ce9 Mon Sep 17 00:00:00 2001 From: Paul Wang Date: Sun, 4 May 2025 12:35:14 +0000 Subject: [PATCH 02/10] =?UTF-8?q?=E5=8F=96=E6=B6=88commit?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- HI | 1 - 1 file changed, 1 deletion(-) delete mode 100644 HI diff --git a/HI b/HI deleted file mode 100644 index ae8d4cf..0000000 --- a/HI +++ /dev/null @@ -1 +0,0 @@ -ccccsdfaksdjfadjsflksjdf \ No newline at end of file From 84581447f0dab3deacad7f890e51a53e507ead3b Mon Sep 17 00:00:00 2001 From: Paul Wang Date: Sun, 8 Jun 2025 15:39:02 +0800 Subject: [PATCH 03/10] Update dnn3.py --- dnn/dnn3.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/dnn/dnn3.py b/dnn/dnn3.py index b6b272e..bbdfdca 100644 --- a/dnn/dnn3.py +++ b/dnn/dnn3.py @@ -1,15 +1,18 @@ from __future__ import print_function -from sklearn.cross_validation import train_test_split +#from sklearn.cross_validation import train_test_split +from sklearn.model_selection import train_test_split import pandas as pd import numpy as np np.random.seed(1337) # for reproducibility from keras.preprocessing import sequence -from keras.utils import np_utils +#from keras.utils import np_utils from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, Embedding from keras.layers import LSTM, SimpleRNN, GRU from keras.datasets import imdb -from keras.utils.np_utils import to_categorical +from keras.utils import to_categorical +#from keras.utils.np_utils import to_categorical +#from tensorflow.keras.utils import to_categorical from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) from sklearn import metrics from sklearn.preprocessing import Normalizer @@ -17,8 +20,8 @@ from keras import callbacks from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger -traindata = pd.read_csv('kdd/binary/Training.csv', header=None) -testdata = pd.read_csv('kdd/binary/Testing.csv', header=None) +traindata = pd.read_csv('dnn/kdd/binary/Training.csv', header=None) +testdata = pd.read_csv('dnn/kdd/binary/Testing.csv', header=None) X = traindata.iloc[:,1:42] Y = traindata.iloc[:,0] @@ -54,15 +57,7 @@ # try using different optimizers and different optimizer configs model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) -checkpointer = callbacks.ModelCheckpoint(filepath="kddresults/dnn3layer/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='loss') +checkpointer = callbacks.ModelCheckpoint(filepath="kddresults/dnn3layer/checkpoint-{epoch:02d}.keras", verbose=1, save_best_only=True, monitor='loss') csv_logger = CSVLogger('kddresults/dnn3layer/training_set_dnnanalysis.csv',separator=',', append=False) -model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=100, callbacks=[checkpointer,csv_logger]) -model.save("kddresults/dnn3layer/dnn3layer_model.hdf5") - - - - - - - - +model.fit(X_train, y_train, batch_size=batch_size, epochs=100, callbacks=[checkpointer,csv_logger]) +model.save("dnn/chung_results/dnn3layer_model.hdf5") From ae2bc796c1ad8bc78ab2a23ce4a10415776aa50c Mon Sep 17 00:00:00 2001 From: Paul Wang Date: Sun, 8 Jun 2025 15:52:52 +0800 Subject: [PATCH 04/10] Update dnn3.py --- dnn/dnn3.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/dnn/dnn3.py b/dnn/dnn3.py index bbdfdca..40e4487 100644 --- a/dnn/dnn3.py +++ b/dnn/dnn3.py @@ -20,8 +20,8 @@ from keras import callbacks from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger -traindata = pd.read_csv('dnn/kdd/binary/Training.csv', header=None) -testdata = pd.read_csv('dnn/kdd/binary/Testing.csv', header=None) +traindata = pd.read_csv('kdd/binary/Training.csv', header=None) +testdata = pd.read_csv('kdd/binary/Testing.csv', header=None) X = traindata.iloc[:,1:42] Y = traindata.iloc[:,0] @@ -59,5 +59,22 @@ model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) checkpointer = callbacks.ModelCheckpoint(filepath="kddresults/dnn3layer/checkpoint-{epoch:02d}.keras", verbose=1, save_best_only=True, monitor='loss') csv_logger = CSVLogger('kddresults/dnn3layer/training_set_dnnanalysis.csv',separator=',', append=False) + +# 建立必要的資料夾 +os.makedirs("kddresults/dnn3layer", exist_ok=True) +os.makedirs("dnn/chung_results", exist_ok=True) + +# 設定 callbacks +checkpointer = callbacks.ModelCheckpoint( + filepath="kddresults/dnn3layer/checkpoint-{epoch:02d}.keras", + verbose=1, + save_best_only=True, + monitor='loss' +) +csv_logger = CSVLogger( + 'kddresults/dnn3layer/training_set_dnnanalysis.csv', + separator=',', + append=False +) model.fit(X_train, y_train, batch_size=batch_size, epochs=100, callbacks=[checkpointer,csv_logger]) model.save("dnn/chung_results/dnn3layer_model.hdf5") From cde9ab57aed61acb4be02c9db6844e2fdbb6ed04 Mon Sep 17 00:00:00 2001 From: Paul Wang Date: Thu, 12 Jun 2025 15:42:48 +0800 Subject: [PATCH 05/10] Create mft.py --- dnn/mft.py | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 dnn/mft.py diff --git a/dnn/mft.py b/dnn/mft.py new file mode 100644 index 0000000..9c63217 --- /dev/null +++ b/dnn/mft.py @@ -0,0 +1,115 @@ +import os +import pandas as pd +import numpy as np +import tensorflow as tf +from tensorflow.keras import Input, Model +from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Activation, LayerNormalization, MultiHeadAttention, GlobalAveragePooling1D, Reshape +from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger, EarlyStopping, Callback +from sklearn.preprocessing import StandardScaler +from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, classification_report, confusion_matrix + + +np.random.seed(1337) +tf.random.set_seed(1337) + + +train_df = pd.read_csv('kdd/binary/Training.csv', header=None) +test_df = pd.read_csv('kdd/binary/Testing.csv', header=None) + +X_train_raw = train_df.iloc[:, 1:42].values +y_train = train_df.iloc[:, 0].values +X_test_raw = test_df.iloc[:, 1:42].values +y_test = test_df.iloc[:, 0].values + +scaler = StandardScaler() +X_train = scaler.fit_transform(X_train_raw) +X_test = scaler.transform(X_test_raw) + + +os.makedirs("kddresults/transformer_mft", exist_ok=True) +os.makedirs("dnn/chung_results_transformer", exist_ok=True) +os.makedirs("dnnres", exist_ok=True) + + +def build_mft_model(input_dim): + inputs = Input(shape=(input_dim,)) + x = Dense(256)(inputs) + x = BatchNormalization()(x) + x = Activation('relu')(x) + x = Dropout(0.2)(x) + x = Reshape((1, 256))(x) + + attn = MultiHeadAttention(num_heads=8, key_dim=256)(x, x) + x = LayerNormalization()(x + attn) + + ff = Dense(512, activation='relu')(x) + mem = Dense(256, activation='tanh')(ff) + x = LayerNormalization()(x + mem) + + ff2 = Dense(256, activation='relu')(x) + x = LayerNormalization()(x + ff2) + + x = GlobalAveragePooling1D()(x) + x = Dropout(0.4)(x) + outputs = Dense(1, activation='sigmoid')(x) + return Model(inputs, outputs) + + +def focal_loss_fixed(y_true, y_pred): + y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1. - 1e-7) + pos = tf.cast(tf.equal(y_true, 1), tf.float32) + neg = 1.0 - pos + loss = - (0.3 * pos * tf.pow(1 - y_pred, 2) * tf.math.log(y_pred) + + 0.7 * neg * tf.pow(y_pred, 2) * tf.math.log(1 - y_pred)) + return tf.reduce_mean(loss) + + +class MetricsLogger(Callback): + def on_epoch_end(self, epoch, logs=None): + if (epoch + 1) % 10 == 0: + y_pred = (self.model.predict(X_test) > 0.2).astype(int) + acc = accuracy_score(y_test, y_pred) + pre = precision_score(y_test, y_pred) + rec = recall_score(y_test, y_pred) + f1 = f1_score(y_test, y_pred) + print(f"\n[Epoch {epoch+1}] Accuracy: {acc:.4f}, Precision: {pre:.4f}, Recall: {rec:.4f}, F1: {f1:.4f}") + + +model = build_mft_model(41) +model.compile(loss=focal_loss_fixed, optimizer='adam', metrics=['accuracy']) + +checkpointer = ModelCheckpoint("kddresults/transformer_mft/checkpoint-{epoch:02d}.keras", save_best_only=True) +csv_logger = CSVLogger("kddresults/transformer_mft/training_log.csv", separator=',', append=False) +earlystop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True) + +model.fit(X_train, y_train, + batch_size=64, + epochs=100, + validation_split=0.3, + callbacks=[checkpointer, csv_logger, MetricsLogger(), earlystop], + verbose=1) + + +model.save("dnn/chung_results_transformer/final_model.hdf5") + +scores, names = [], [] +for file in os.listdir("kddresults/transformer_mft"): + if file.endswith(".keras"): + model.load_weights(os.path.join("kddresults/transformer_mft", file)) + y_pred = (model.predict(X_test) > 0.2).astype(int) + scores.append(f1_score(y_test, y_pred)) + names.append(file) + +best_model = names[np.argmax(scores)] +model.load_weights(os.path.join("kddresults/transformer_mft", best_model)) +y_pred = (model.predict(X_test) > 0.2).astype(int) +y_proba = model.predict(X_test) + +np.savetxt("dnnres/transformer_predicted.txt", y_pred, fmt='%d') +np.savetxt("dnnres/transformer_probability.txt", y_proba, fmt='%.6f') + +print("\n===== Final Evaluation on Best Transformer Checkpoint (Threshold = 0.32) =====") +print("Confusion Matrix:") +print(confusion_matrix(y_test, y_pred)) +print("\nClassification Report:") +print(classification_report(y_test, y_pred, digits=4)) From db900d670bf0c1eb7c98cb51359e78233d79f0bc Mon Sep 17 00:00:00 2001 From: Paul Wang Date: Thu, 12 Jun 2025 15:50:07 +0800 Subject: [PATCH 06/10] Update dnn3.py --- dnn/dnn3.py | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/dnn/dnn3.py b/dnn/dnn3.py index 40e4487..bbdfdca 100644 --- a/dnn/dnn3.py +++ b/dnn/dnn3.py @@ -20,8 +20,8 @@ from keras import callbacks from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger -traindata = pd.read_csv('kdd/binary/Training.csv', header=None) -testdata = pd.read_csv('kdd/binary/Testing.csv', header=None) +traindata = pd.read_csv('dnn/kdd/binary/Training.csv', header=None) +testdata = pd.read_csv('dnn/kdd/binary/Testing.csv', header=None) X = traindata.iloc[:,1:42] Y = traindata.iloc[:,0] @@ -59,22 +59,5 @@ model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) checkpointer = callbacks.ModelCheckpoint(filepath="kddresults/dnn3layer/checkpoint-{epoch:02d}.keras", verbose=1, save_best_only=True, monitor='loss') csv_logger = CSVLogger('kddresults/dnn3layer/training_set_dnnanalysis.csv',separator=',', append=False) - -# 建立必要的資料夾 -os.makedirs("kddresults/dnn3layer", exist_ok=True) -os.makedirs("dnn/chung_results", exist_ok=True) - -# 設定 callbacks -checkpointer = callbacks.ModelCheckpoint( - filepath="kddresults/dnn3layer/checkpoint-{epoch:02d}.keras", - verbose=1, - save_best_only=True, - monitor='loss' -) -csv_logger = CSVLogger( - 'kddresults/dnn3layer/training_set_dnnanalysis.csv', - separator=',', - append=False -) model.fit(X_train, y_train, batch_size=batch_size, epochs=100, callbacks=[checkpointer,csv_logger]) model.save("dnn/chung_results/dnn3layer_model.hdf5") From 0e692586348acb255aba4d524101aff434881238 Mon Sep 17 00:00:00 2001 From: Paul Wang Date: Thu, 12 Jun 2025 15:51:49 +0800 Subject: [PATCH 07/10] Update dnn3.py From 3147ec3fb62b2e71f1fe33d88044ef2f041d4ea3 Mon Sep 17 00:00:00 2001 From: Paul Wang Date: Thu, 12 Jun 2025 15:54:05 +0800 Subject: [PATCH 08/10] Update dnn3.py --- dnn/dnn3.py | 49 +++++++++---------------------------------------- 1 file changed, 9 insertions(+), 40 deletions(-) diff --git a/dnn/dnn3.py b/dnn/dnn3.py index bbdfdca..0c080c9 100644 --- a/dnn/dnn3.py +++ b/dnn/dnn3.py @@ -1,18 +1,15 @@ from __future__ import print_function -#from sklearn.cross_validation import train_test_split -from sklearn.model_selection import train_test_split +from sklearn.cross_validation import train_test_split import pandas as pd import numpy as np np.random.seed(1337) # for reproducibility from keras.preprocessing import sequence -#from keras.utils import np_utils +from keras.utils import np_utils from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, Embedding from keras.layers import LSTM, SimpleRNN, GRU from keras.datasets import imdb -from keras.utils import to_categorical -#from keras.utils.np_utils import to_categorical -#from tensorflow.keras.utils import to_categorical +from keras.utils.np_utils import to_categorical from sklearn.metrics import (precision_score, recall_score,f1_score, accuracy_score,mean_squared_error,mean_absolute_error) from sklearn import metrics from sklearn.preprocessing import Normalizer @@ -20,44 +17,16 @@ from keras import callbacks from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger -traindata = pd.read_csv('dnn/kdd/binary/Training.csv', header=None) -testdata = pd.read_csv('dnn/kdd/binary/Testing.csv', header=None) +traindata = pd.read_csv('kdd/binary/Training.csv', header=None) +testdata = pd.read_csv('kdd/binary/Testing.csv', header=None) X = traindata.iloc[:,1:42] Y = traindata.iloc[:,0] -C = testdata.iloc[:,0] -T = testdata.iloc[:,1:42] - -scaler = Normalizer().fit(X) -trainX = scaler.transform(X) - -scaler = Normalizer().fit(T) -testT = scaler.transform(T) - -y_train = np.array(Y) -y_test = np.array(C) - - -X_train = np.array(trainX) -X_test = np.array(testT) - - -batch_size = 64 - -# 1. define the network -model = Sequential() -model.add(Dense(1024,input_dim=41,activation='relu')) -model.add(Dropout(0.01)) -model.add(Dense(768,activation='relu')) -model.add(Dropout(0.01)) -model.add(Dense(512,activation='relu')) -model.add(Dropout(0.01)) -model.add(Dense(1)) -model.add(Activation('sigmoid')) + @@ -54,15 +57,7 @@ # try using different optimizers and different optimizer configs model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) -checkpointer = callbacks.ModelCheckpoint(filepath="kddresults/dnn3layer/checkpoint-{epoch:02d}.keras", verbose=1, save_best_only=True, monitor='loss') +checkpointer = callbacks.ModelCheckpoint(filepath="kddresults/dnn3layer/checkpoint-{epoch:02d}.hdf5", verbose=1, save_best_only=True, monitor='loss') csv_logger = CSVLogger('kddresults/dnn3layer/training_set_dnnanalysis.csv',separator=',', append=False) -model.fit(X_train, y_train, batch_size=batch_size, epochs=100, callbacks=[checkpointer,csv_logger]) -model.save("dnn/chung_results/dnn3layer_model.hdf5") +model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=100, callbacks=[checkpointer,csv_logger]) +model.save("kddresults/dnn3layer/dnn3layer_model.hdf5") From f2f6bff17704898a826512a02b435b2e9dec292a Mon Sep 17 00:00:00 2001 From: Paul Wang Date: Thu, 12 Jun 2025 15:55:49 +0800 Subject: [PATCH 09/10] Update dnn3.py --- dnn/dnn3.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/dnn/dnn3.py b/dnn/dnn3.py index 0c080c9..3fdd5e4 100644 --- a/dnn/dnn3.py +++ b/dnn/dnn3.py @@ -22,7 +22,35 @@ X = traindata.iloc[:,1:42] Y = traindata.iloc[:,0] - @@ -54,15 +57,7 @@ +C = testdata.iloc[:,0] +T = testdata.iloc[:,1:42] + +scaler = Normalizer().fit(X) +trainX = scaler.transform(X) + +scaler = Normalizer().fit(T) +testT = scaler.transform(T) + +y_train = np.array(Y) +y_test = np.array(C) + + +X_train = np.array(trainX) +X_test = np.array(testT) + + +batch_size = 64 + +# 1. define the network +model = Sequential() +model.add(Dense(1024,input_dim=41,activation='relu')) +model.add(Dropout(0.01)) +model.add(Dense(768,activation='relu')) +model.add(Dropout(0.01)) +model.add(Dense(512,activation='relu')) +model.add(Dropout(0.01)) +model.add(Dense(1)) +model.add(Activation('sigmoid')) # try using different optimizers and different optimizer configs model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy']) From 83c2f0a31e65063e46c8bf9fcd1edf7e2d619feb Mon Sep 17 00:00:00 2001 From: Paul Wang Date: Thu, 12 Jun 2025 15:57:24 +0800 Subject: [PATCH 10/10] Update dnn3.py --- dnn/dnn3.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dnn/dnn3.py b/dnn/dnn3.py index 3fdd5e4..706d025 100644 --- a/dnn/dnn3.py +++ b/dnn/dnn3.py @@ -58,3 +58,10 @@ csv_logger = CSVLogger('kddresults/dnn3layer/training_set_dnnanalysis.csv',separator=',', append=False) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=100, callbacks=[checkpointer,csv_logger]) model.save("kddresults/dnn3layer/dnn3layer_model.hdf5") + + + + + + +