Nov 2, 2008
Data Mining just burned my nerves!
Right now, I am working on an assignement (project … they call it) to apply the data pre-processing techniques of cancer data. I wrote the code in C# the quick and dirty way… this is the worst code I ever wrote in my life ! i will post it here later.. wish me luck
here is the dirty code:
[sourcecode language='c#']using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Collections;
namespace DataMining_001
{
class Program
{
static bool allowed(char test)
{
bool flag=false;
switch (test)
{
case ‘,’:
case ‘-’:
case ‘.’:
case ’0′:
case ’1′:
case ’2′:
case ’3′:
case ’4′:
case ’5′:
case ’6′:
case ’7′:
case ’8′:
case ’9′: flag = true;
break;
default:
break;
}
if (flag)
return flag;
else
return false;
}
static void Main(string[] args)
{
StreamReader myCloude = new StreamReader(“mydata.data”);
string inCloude = myCloude.ReadToEnd();
double [,] table=new double [182,500];
string [] pie;
pie = inCloude.Split(‘,’);
int rows=0, cols=0;
int count = 0; string temp=”";
foreach (string x in pie)
{
if (x.Contains(“ADCA”))
temp = x.Replace(“ADCA”, “”);
else if (x.Contains(“Mesothelioma”))
temp = x.Replace(“Mesothelioma”, “”);
else
temp = x;
count++;
//Console.WriteLine(x);
if (temp == “”)
temp = “0.00000000000000000000000001″;
//Console.WriteLine(temp);
table[rows, cols] = Double.Parse(temp);
if (cols == 500-1)
{
cols = 0;
rows++;
}
else
{
cols++;
}
}
Console.Clear();
Console.WriteLine(“Data Gathered … hit enter key to proceed”);
Console.Read();
double[] avg,min,max;
avg=new double[500];
min=new double[500];
max=new double[500];
double[] Rs = new double[5];
double avgL;
double inT;
for (int z = 0; z < 500; z++)
{
ArrayList Arr = new ArrayList();
avgL = 0;
for (int l = 0; l < 181; l++)
{
Arr.Add(table[l, z]);
avgL += (double)Arr[l];
}
avg[z] = avgL / (double)Arr.Count;
for (int h = 0; h < Arr.Count; h++)
{
if ((double)Arr[h] == 0.00000000000000000000000001)
Arr[h] = avg[z];
}
Arr.Sort();
min[z] = (double)Arr[0];
max[z] = (double)Arr[Arr.Count-1];
inT = (max[z] – min[z]) / 5;
ArrayList B1 = new ArrayList();
ArrayList B2 = new ArrayList();
ArrayList B3 = new ArrayList();
ArrayList B4 = new ArrayList();
ArrayList B5 = new ArrayList();
for (int o = 0; o < Arr.Count; o++)
{
if ((double)Arr[o] <= inT)
{
B1.Add(Arr[o]);
}
else if ((double)Arr[o] <= inT * 2)
{
B2.Add(Arr[o]);
}
else if ((double)Arr[o] <= inT * 3)
{
B3.Add(Arr[o]);
}
else if ((double)Arr[o] <= inT * 4)
{
B4.Add(Arr[o]);
}
else
{
B5.Add(Arr[o]);
}
}
Console.WriteLine(“Count: ++++++++++” + z);
Console.WriteLine(“BIN 1″);
double ACC = 0;
double mean = 0;
foreach (double b1 in B1)
{
ACC +=b1;
}
mean = ACC / (double)B1.Count;
for (int u = 0; u < B1.Count; u++)
{
B1[u] = mean;
}
ACC = 0;
mean = 0;
Console.WriteLine(“BIN 2″);
foreach (double b2 in B2)
{
ACC += b2;
}
mean = ACC / (double)B2.Count;
for(int u=0;u
B2[u]= mean;
}
Console.WriteLine(“M= ” + mean);
ACC = 0;
mean = 0;
Console.WriteLine(“BIN 3″);
foreach (double b3 in B3)
{
ACC += b3;
}
mean = ACC / (double)B3.Count;
for (int u = 0; u < B3.Count; u++)
{
B3[u] = mean;
}
Console.WriteLine(“M= ” + mean);
ACC = 0;
mean = 0;
Console.WriteLine(“BIN 4″);
foreach (double b4 in B4)
{
ACC += b4;
}
mean = ACC / (double)B4.Count;
for (int u = 0; u < B4.Count; u++)
{
B4[u] = mean;
}
Console.WriteLine(“M= ” + mean);
ACC = 0;
mean = 0;
Console.WriteLine(“BIN 5″);
foreach (double b5 in B5)
{
ACC += b5;
}
mean = ACC / (double)B5.Count;
for (int u = 0; u < B5.Count; u++)
{
B5[u] = mean;
}
Console.WriteLine(“M= ” + mean);
Console.WriteLine(“Count”
ACC = 0;
mean = 0;
}
}
}
}[/sourcecode]










Hi there,
Thank you! I would now go on this blog every day!
Thanks
AnnaHopn