diff --git a/ageUpscaling/dataloaders/base.py b/ageUpscaling/dataloaders/base.py index 3644dc92dae59e3292e762a378accb1f608610a4..5e4e30fda56418a9dc24eb22de2621bd361fcf1e 100644 --- a/ageUpscaling/dataloaders/base.py +++ b/ageUpscaling/dataloaders/base.py @@ -57,7 +57,9 @@ class MLData: return X.to_array().transpose('plot', 'sample', 'variable').values def get_y(self, - target)-> Tuple[ArrayLike, ArrayLike, int]: + target, + method, + max_forest_age)-> Tuple[ArrayLike, ArrayLike, int]: """ Method to get the target. @@ -66,9 +68,19 @@ class MLData: ------ y (np.array): the target """ - Y = xr.open_dataset(self.cube_path).sel(spatial_cluster = self.subset)[target] + Y = np.round(xr.open_dataset(self.cube_path).sel(spatial_cluster = self.subset)[target]) - return Y.to_array().values + if method == 'MLPClassifier': + Y = Y.to_array().values + mask_old = Y== max_forest_age + mask_young = Y<max_forest_age + Y[mask_old] = 1 + Y[mask_young] = 0 + + elif method == 'MLPRegressor': + Y = Y.where(Y<max_forest_age).to_array().values + + return Y def standardize(self, x): return (x - np.nanmean(x)) / np.nanstd(x) @@ -76,11 +88,13 @@ class MLData: def get_xy(self, standardize:bool=True): - self.y = self.get_y(target=self.data_config['target']).reshape(-1) + self.y = self.get_y(target=self.data_config['target'], + method = self.data_config['method'][0], + max_forest_age =self.data_config['max_forest_age'][0]).reshape(-1) self.x = self.get_x(features= self.data_config['features'], standardize= standardize).reshape(-1, len(self.data_config['features'])) mask_nan = (np.all(np.isfinite(self.x), axis=1)) & (np.isfinite(self.y)) self.x, self.y = self.x[mask_nan, :], self.y[mask_nan] - - return {'features' : self.x.astype('float32'), "target": self.y.astype('float32')} + + return {'features' : self.x.astype('float32'), "target": self.y.astype('int')} \ No newline at end of file diff --git a/ageUpscaling/methods/MLP.py b/ageUpscaling/methods/MLP.py index e6ded36947def56457c76290a222a0b4c7c55ed1..956d80085f1cc0c931b1a5cb7fe95d9b5f73b6d7 100644 --- a/ageUpscaling/methods/MLP.py +++ b/ageUpscaling/methods/MLP.py @@ -69,7 +69,6 @@ class MLPmethod: def train(self, cube_path:np.array = [], - mlp_method:str = 'MLPRegressor', train_subset:dict={}, valid_subset:dict={}) -> None: @@ -78,24 +77,23 @@ class MLPmethod: train_subset=train_subset, valid_subset=valid_subset) - if not os.path.exists(self.save_dir + '/save_model/{method}/'.format(method = mlp_method)): - os.makedirs(self.save_dir + '/save_model/{method}/'.format(method = mlp_method)) + if not os.path.exists(self.save_dir + '/save_model/{method}/'.format(method = self.data_config['method'][0])): + os.makedirs(self.save_dir + '/save_model/{method}/'.format(method = self.data_config['method'][0])) study = optuna.create_study(study_name = 'hpo_ForestAge', - storage='sqlite:///' + self.save_dir + '/save_model/{method}/hp_trial.db'.format(method = mlp_method), + storage='sqlite:///' + self.save_dir + '/save_model/{method}/hp_trial.db'.format(method = self.data_config['method'][0]), pruner= optuna.pruners.SuccessiveHalvingPruner(min_resource='auto', reduction_factor=4, min_early_stopping_rate=0), direction='minimize') - study.optimize(lambda trial: self.hp_search(trial, mlp_method, self.data_config, mldata, self.save_dir), + study.optimize(lambda trial: self.hp_search(trial, self.data_config, mldata, self.save_dir), n_trials=300, n_jobs=4) - with open(self.save_dir + "/'/save_model/{method}/model_trial_{id_}.pickle".format(method = mlp_method, id_ = study.best_trial.number), "rb") as fin: + with open(self.save_dir + "/'/save_model/{method}/model_trial_{id_}.pickle".format(method = self.data_config['method'][0], id_ = study.best_trial.number), "rb") as fin: self.best_model = pickle.load(fin) def hp_search(self, trial: optuna.Trial, - mlp_method:str, data_config:dict, mldata:dict, save_dir:str) -> float: @@ -104,13 +102,16 @@ class MLPmethod: 'learning_rate_init': trial.suggest_float('learning_rate_init ', data_config['hyper_params']['learning_rate_init']['min'], data_config['hyper_params']['learning_rate_init']['max'], step=data_config['hyper_params']['learning_rate_init']['step']), 'first_layer_neurons': trial.suggest_int('first_layer_neurons', data_config['hyper_params']['first_layer_neurons']['min'], data_config['hyper_params']['first_layer_neurons']['max'], step=data_config['hyper_params']['first_layer_neurons']['step']), 'second_layer_neurons': trial.suggest_int('second_layer_neurons', data_config['hyper_params']['second_layer_neurons']['min'], data_config['hyper_params']['second_layer_neurons']['max'], step=data_config['hyper_params']['second_layer_neurons']['step']), + 'third_layer_neurons': trial.suggest_int('third_layer_neurons', data_config['hyper_params']['third_layer_neurons']['min'], data_config['hyper_params']['third_layer_neurons']['max'], step=data_config['hyper_params']['third_layer_neurons']['step']), 'activation': trial.suggest_categorical('activation', data_config['hyper_params']['activation']), 'batch_size': trial.suggest_int('batch_size', data_config['hyper_params']['batch_size']['min'], data_config['hyper_params']['batch_size']['max'], step=data_config['hyper_params']['batch_size']['step'])} - if mlp_method == "MLPRegressor": + if self.data_config['method'][0] == "MLPRegressor": model_ = MLPRegressor( hidden_layer_sizes=(hyper_params['first_layer_neurons'], - hyper_params['second_layer_neurons']), + hyper_params['second_layer_neurons'], + hyper_params['third_layer_neurons'], + ), learning_rate_init=hyper_params['learning_rate_init'], activation=hyper_params['activation'], batch_size=hyper_params['batch_size'], @@ -118,7 +119,7 @@ class MLPmethod: max_iter=100, early_stopping= True, validation_fraction = 0.3) - elif mlp_method == "MLPClassifier": + elif self.data_config['method'][0] == "MLPClassifier": model_ = MLPClassifier( hidden_layer_sizes=(hyper_params['first_layer_neurons'], hyper_params['second_layer_neurons']), @@ -132,15 +133,15 @@ class MLPmethod: model_.fit(mldata.train_dataloader().get_xy()['features'], mldata.train_dataloader().get_xy()['target']) - with open(save_dir + "/save_model/{method}/model_trial_{id_}.pickle".format(method = mlp_method, id_ = trial.number), "wb") as fout: + with open(save_dir + "/save_model/{method}/model_trial_{id_}.pickle".format(method = self.data_config['method'][0], id_ = trial.number), "wb") as fout: pickle.dump(model_, fout) if trial.should_prune(): raise optuna.exceptions.TrialPruned() - if mlp_method == "MLPRegressor": + if self.data_config['method'][0] == "MLPRegressor": loss_ = mean_squared_error(mldata.val_dataloader().get_xy()['target'], model_.predict(mldata.val_dataloader().get_xy()['features']), squared=False) - if mlp_method == "MLPClassifier": + if self.data_config['method'][0] == "MLPClassifier": loss_ = log_loss(mldata.val_dataloader().get_xy()['target'], model_.predict(mldata.val_dataloader().get_xy()['features'])) return loss_ diff --git a/dev/training_MLP.py b/dev/training_MLP.py index e76199411c026a2ab378e5ac0b616d7c41c0202d..2fee4dcdb6dd15f22a9fb41b539416d9692e08d0 100644 --- a/dev/training_MLP.py +++ b/dev/training_MLP.py @@ -17,5 +17,4 @@ data_config_path= "/home/simon/Documents/science/GFZ/projects/forest_age_upscale #%% Run training mlp_method = MLPmethod(save_dir="/home/simon/Documents/science/GFZ/projects/forest_age_upscale/output/test_train/", data_config_path= data_config_path) - -mlp_method.train(cube_path=cube_path, train_subset=train_subset,valid_subset=valid_subset) \ No newline at end of file +mlp_method.train(cube_path=cube_path, train_subset=train_subset,valid_subset=valid_subset) diff --git a/experiments/data_config.yaml b/experiments/data_config.yaml index 08d209f85104d40c9d696bafa302eab40dfd5698..fe973c314414937a497cf7b491a8d973007887bc 100644 --- a/experiments/data_config.yaml +++ b/experiments/data_config.yaml @@ -1,4 +1,3 @@ -# Features features: - agb - AnnualMeanTemperature @@ -24,10 +23,13 @@ features: - AnnualWind - AnnualVapr -# Target target: - age - + +method: + - MLPClassifier +max_forest_age: + - 300 hyper_params: learning_rate_init: min: 0.0001 @@ -41,6 +43,10 @@ hyper_params: min: 10 max: 100 step: 10 + third_layer_neurons: + min: 10 + max: 100 + step: 10 activation: ['identity', 'tanh', 'relu'] batch_size: min: 16 diff --git a/setup.py b/setup.py index f7d5df71eb5b1127f317a56f610829e03355dfd9..978c57484aa2277f58b7dda4a5bd735c01205c63 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,8 @@ setuptools.setup( "Boruta==0.3", "numpy==1.23.4", "netCDF4==1.6.1", - "optuna==3.0.3" - ], + "optuna==3.0.3", + "rasterio==1.3.3", + "matplotlib==3.6.2"], include_package_data=True, )