1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | # https://www.kaggle.com/code/kkhandekar/image-classification-using-sklearn-randomforest/notebook import numpy as np import pandas as pd import warnings warnings.simplefilter('ignore') import matplotlib.pyplot as plt #%matplotlib inline import os import pprint from collections import Counter import joblib from pprint import pprint import cv2 from skimage.io import imread from skimage.transform import resize from skimage.transform import rescale from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import RandomizedSearchCV, GridSearchCV def resize_all(src, pklname, include, width=150, height=None): height=height if height is not None else width data=dict() data['description']='resized ({0}*{1}) mini dog images in rgb'.format(int(width), int(height)) data['label']=[] data['filename']=[] data['data']=[] pklname=f"{pklname}_{width}x{height}px.pkl" for subdir in os.listdir(src): if subdir in include: print(f"Reading images for {subdir} ...") current_path=os.path.join(src, subdir) for file in os.listdir(current_path): if file[-3:] in {'jpg', 'png'}: im=imread(os.path.join(current_path, file)) im=resize(im, (width, height)) data['label'].append(subdir[:]) data['filename'].append(file) data['data'].append(im) joblib.dump(data, pklname) IMAGE_PATH='D:\A\Dat\AnoPyTest\kkk\MiniDogBreedData' CLASSES=os.listdir(IMAGE_PATH) BASE_NAME='mini_dog_breeds' WIDTH=90 # Load & resize the images resize_all(src=IMAGE_PATH, pklname=BASE_NAME, width=WIDTH, include=CLASSES) data=joblib.load(f'{BASE_NAME}_{WIDTH}x{WIDTH}px.pkl') print('number of samples: ', len(data['data'])) print('keys: ', list(data.keys())) print('description: ', data['description']) print('image shape: ', data['data'][0].shape) print('labels: ', np.unique(data['label'])) print(Counter(data['label'])) labels=np.unique(data['label']) # fig, axes=plt.subplot(1, len(labels)) fig, axes = plt.subplots(1, len(labels)) fig.set_size_inches(15, 4) fig.tight_layout() for ax, label in zip(axes, labels): idx=data['label'].index(label) ax.imshow(data['data'][idx]) #plt.show() ax.axis('off') ax.set_title(label) x=np.array(data['data']) y=np.array(data['label']) SIZE=0.1 x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=SIZE, shuffle=True, random_state=np.random.randint(1,50)) print(f"Training Size: {x_train.shape[0]}\n Validation Size: {x_test.shape[0]}") x_train=x_train/255.0 x_test=x_test/255.0 nsamples, nx, ny, nrgb=x_train.shape x_train2=x_train.reshape((nsamples, nx*ny*nrgb)) nsamples, nx, ny,nrgb=x_test.shape x_test2=x_test.reshape((nsamples, nx*ny*nrgb)) rfc=RandomForestClassifier() rfc.fit(x_train2, y_train) y_pred=rfc.predict(x_test2) from sklearn.metrics import accuracy_score acc='{:.2%}'.format(accuracy_score(y_test, y_pred)) print(f"Accuracy for Random Forrest: {acc}") n_estimators=[int(x) for x in np.linspace(start=200, stop=1000, num=3)] criterion=['gini', 'entropy'] max_depth=[int(x) for x in np.linspace(10, 110, num=3)] max_depth.append(None) min_samples_split=[2, 5, 10] min_samples_leaf=[1, 2, 4] bootstrap=[True, False] class_weight=['balanced', 'balanced_subsample', None] param_grid={'n_estimators': n_estimators, 'criterion': criterion, 'max_depth': max_depth, 'min_samples_split': min_samples_split, 'min_samples_leaf': min_samples_leaf, 'bootstrap': bootstrap, 'class_weight': class_weight} pprint(param_grid) rfc_t=RandomForestClassifier() rf_random=RandomizedSearchCV( estimator=rfc_t, param_distributions=param_grid, n_iter=10, cv=3, verbose=0, random_state=42, n_jobs=-1 ) rf_random.fit(x_train2, y_train) rf_random.best_params_ rfc=RandomForestClassifier() rfc.fit(x_train2, y_train) y_pred=rfc.predict(x_test2) # accuracy score acc = '{:.1%}'.format(accuracy_score(y_test, y_pred)) print(f"Accuracy for Random Forrest: {acc}") |
Wednesday, January 24, 2024
ML: Image classification using Sklearn (RandomForest)
Labels:
ML
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment