识别手写数字,集成django web应用

清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>

# import the necessary packages
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report
from sklearn import datasets
from sklearn.decomposition import PCA as pca
from nolearn.dbn import DBN
from matplotlib import pyplot
from PIL import Image
import numpy as np
import scipy

STANDARD_SIZE = (28, 28)

class DigitProphet(object):
	def __init__(self):
		# load train.csv
		# train = pd.read_csv("data/train.csv")
		# data_train=train.as_matrix()
		# values_train=data_train[:,0]
		# images_train=data_train[:,1:]
		# trainX, _trainX, trainY, _trainY = train_test_split(images_train/255.,values_train,test_size=0.5)

		# #load test.csv
		# test = pd.read_csv("data/test.csv")
		# data_test=test.as_matrix()
		# testX, _testX = train_test_split(data_test/255.,test_size=0.99)
		
		# Random Forest
		# self.clf = RandomForestClassifier()
		
		# Stochastic Gradient Descent
		# self.clf = SGDClassifier()
		
		# Support Vector Machine
		# self.clf = LinearSVC()
		
		# Nearest Neighbors
		# self.clf = KNeighborsClassifier(n_neighbors=13)
		
		
		train = pd.read_csv("data/train.csv")
		data_train=train.as_matrix()
		values_train=data_train[:,0]
		images_train=data_train[:,1:]
		trainX, _trainX, trainY, _trainY = train_test_split(images_train/255.,values_train,test_size=0.995)
		
		# Neural Network
		self.clf = DBN([trainX.shape[1], 300, 10],learn_rates=0.3,learn_rate_decays=0.9,epochs=10,verbose = 1)
		
		#Training
		self.clf.fit(trainX, trainY)
		
		pass

	def predictImage(self,array):
		image=np.atleast_2d(array)
		return self.clf.predict(image)[0]


def trim(image):
	image_data = np.array(image)
	image_data_bw = image_data.min(axis=2)
	row_min = np.where(image_data_bw.min(axis=1)<255)[0].min()
	row_max = np.where(image_data_bw.min(axis=1)<255)[0].max()
	col_min = np.where(image_data_bw.min(axis=0)<255)[0].min()
	col_max = np.where(image_data_bw.min(axis=0)<255)[0].max()
	size=int((max(row_max-row_min,col_max-col_min))*1.3)
	cropBox = (row_min, row_max, col_min, col_max)
	image_data_new = image_data[cropBox[0]:cropBox[1]+1, cropBox[2]:cropBox[3]+1 , :]
	new_image = Image.fromarray(image_data_new)
	
	img_w, img_h = new_image.size
	background = Image.new('RGBA', (size, size), (255, 255, 255, 255))
	bg_w, bg_h = background.size
	offset = ((bg_w-img_w)/2,(bg_h-img_h)/2)
	background.paste(new_image, offset)
	return background

def getimgdata(filename):
	img = Image.open(filename)
	img=alpha_to_color(img)
	img = trim(img)
	img = img.convert('L')
	img = img.getdata()
	img = img.resize(STANDARD_SIZE)
	img = np.array(img)/255.
	img = [1-i for i in img]
	return img
	
def alpha_to_color(image, color=(255, 255, 255)):
    x = np.array(image)
    r, g, b, a = np.rollaxis(x, axis=-1)
    r[a == 0] = color[0]
    g[a == 0] = color[1]
    b[a == 0] = color[2] 
    x = np.dstack([r, g, b, a])
    return Image.fromarray(x, 'RGBA')
		
def saveImage(array,path='outfile.jpg'):
	# Get the training data back to its original form.
	matrix = np.reshape(array, (STANDARD_SIZE))
	# Get the original pixel values.
	matrix = matrix*255. 
	# pyplot.imshow(sample, cmap = pyplot.cm.gray)
	# result=predictImg(clf,image)
	scipy.misc.imsave(path, matrix)
		

dp=DigitProphet()
pointer=0

def main():
	# filename="imageToSave.png"
	# data=getimgdata(filename)
	# saveImage(data)
	# preds=dp.predictImage(data)
	# print preds
	pass
	
	
if __name__ == '__main__':
	main()