Skip to content
Snippets Groups Projects
Commit fad95a5c authored by caleb.biggs's avatar caleb.biggs
Browse files

Done

parents
Branches master
No related tags found
No related merge requests found
This diff is collapsed.
main.py 0 → 100644
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline
from sklearn.metrics import silhouette_score
#Categorizes Pokemon of a given type into the given number of clusters
def categorize(dataByType, type1, numClusters):
steps = [
('scale', MinMaxScaler()),
('cluster', KMeans(n_clusters=numClusters, n_init=10))
]
pipe = Pipeline(steps)
toDrop = ['Name', 'Type 1', 'Type 2']
typeData = dataByType[type1].drop(columns=toDrop)
pipe.fit(typeData)
predictions = pipe.predict(typeData)
return (numClusters, silhouette_score(typeData, predictions), predictions)
pd.set_option('display.width', None)
pd.set_option('display.max_rows', None)
#Read in data, extract a list of types, and partition the data by type
data = pd.read_csv("Pokemon.csv")
types = data["Type 1"].unique()
dataByType = {}
for pokemon in data.iterrows():
if pokemon[1]["Type 1"] not in dataByType:
dataByType[pokemon[1]["Type 1"]] = pd.DataFrame([pokemon[1]])
continue
dataByType[pokemon[1]["Type 1"]] = pd.concat([
dataByType[pokemon[1]["Type 1"]],
pd.DataFrame([pokemon[1]])
])
#Get the clustering data for each type and print it
typePredictions = {}
for type1 in types:
bestNum = 0
bestScore = 0
print(f"{type1}\n-----------")
for i in range (2, 15):
if i >= len(dataByType[type1]): break
output = categorize(dataByType, type1, i)
print(f"{output[0]} clusters: {output[1]}")
if output[1] > bestScore:
bestNum = output[0]
bestScore = output[1]
typePredictions[type1] = (output[0], output[2])
print(f"best number of clusters: {bestNum}\nbest score: {bestScore}\n")
#Create dataframes from the best categories from the previous step
typeClusters = {}
for type1 in types:
typeClusters[type1] = {}
for i in range(len(typePredictions[type1][1])):
if typePredictions[type1][1][i] not in typeClusters[type1]:
#Gotta love Python
typeClusters[type1][typePredictions[type1][1][i]] = pd.DataFrame([dataByType[type1].iloc[i]])
continue
typeClusters[type1][typePredictions[type1][1][i]] = pd.concat([
typeClusters[type1][typePredictions[type1][1][i]],
pd.DataFrame([dataByType[type1].iloc[i]])
])
#Print full details of the best clusters
for type1 in types:
print(f"\n{type1}\n-----")
for i in range(len(typeClusters[type1])):
print(f"Cluster {i}\n{typeClusters[type1][i]}")
print(f"Mean HP: {typeClusters[type1][i].loc[:, 'HP'].mean()}\
\nMean Attack: {typeClusters[type1][i].loc[:, 'Attack'].mean()}\
\nMean Defense: {typeClusters[type1][i].loc[:, 'Defense'].mean()}\
\nMean Sp. Atk: {typeClusters[type1][i].loc[:, 'Sp. Atk'].mean()}\
\nMean Sp. Def: {typeClusters[type1][i].loc[:, 'Sp. Def'].mean()}\
\nMean Speed: {typeClusters[type1][i].loc[:, 'Speed'].mean()}")
print()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment