Open Source

Welcome!

Please use the code sources herein the way you see fit. Furthermore, if you need IT help independent of what is openly provided to you, we offer per request services as described on Services page.

Python Code for Computing Species Diversity


# this code computes Shannon Wiener Diversity Index, Maximum Diversity Possible, Eveness, Species Richness and Total 
# Abundance
# the code plots the DIversity Index: H, Eveness, Species Richness: S, and Total Abundance

import pandas as pd
import numpy as np
from matplotlib.ticker import StrMethodFormatter
from pathlib import Path
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
import math as mat
depth = []
species = []
abund = []
totalAbund = []
sum = []
index = []
eve = []
linesplit = ""
sR = []
s = 0
f = open(r'C:\\Users\Emil\PycharmProjects\pythonTurtle\Data\HydroCrbVentsDataSet.txt', "r")
lines = f.readline()
linesplit = lines.split("\t")
no_of_species = (len(linesplit))
line_cnt = 0
data_line = 0

while True:
    line_cnt +=1
    if (line_cnt < no_of_species):
        species.append(linesplit[line_cnt].strip())
    else:
        break
    line_no = -1
    num_data = []
    vector = np.zeros((no_of_species))
    pILnpiVector = np.zeros((no_of_species))

for OneLine in f.readlines():
    vector = np.zeros((no_of_species))
    line_no += 1
    cnt = -1
    LN = OneLine.split("\t")
    #print(len(LN))
    while True:
        cnt += 1
        if cnt < len(LN):
            if cnt ==  0:
                depth.append(LN[cnt])
            else:
                abund.append(LN[cnt].strip())
                vector[cnt-1] = (float)(LN[cnt].strip())              
                if (float)(LN[cnt].strip()) != 0:
                    s +=1
              
        else:
            break
    num_data.append(abund)
    #total = np.sum(vector)                   
    sR.append(s)
    s = 0
    total = np.sum(vector)
    totalAbund.append(total)
    
    for A in num_data:
        vcnt = -1
        for a in A:
            vcnt +=1
            try:
                pi = float(a)/float(total)
            except: 
                pi = 0
            if pi == 0:
                pILnpiVector[vcnt] = 0;
                
            else:
                pILnpiVector[vcnt] = pi*(mat.log(pi))                                       
    data_line =  line_no                          
    #print(np.sum(pILnpiVector))
    index.append(abs(np.sum(pILnpiVector)))
    abund = []

for x in range(len(depth)):
    if sR[x] != 0:
        Hmax = mat.log(sR[x])
        eve.append(index[x]/(Hmax))
    else:
        Hmax = 0
        eve.append(Hmax)

f.close
graph, (plot1, plot2, plot3, plot4) = plt.subplots(1, 4)
plot1.plot(index, depth, color = 'red', label = 'Species Diversity')
plot2.plot(eve, depth, color = 'blue', label = 'Eveness')
plot3.plot(sR, depth, color = 'green', label = 'Species Richness')
plot4.plot(totalAbund, depth, color = 'black', label = 'Abundance')
plot1.set_title ('H')
plot2.set_title ('Eveness')
plot3.set_title ('S')
plot4.set_title ('TotalAbund')
plot1.invert_yaxis()
plot2.invert_yaxis()
plot3.invert_yaxis()
plot4.invert_yaxis()
#print(totalAbund)
graph.tight_layout()
plt.show()

Top

Python Code for Computing and Plotting Species/Abundance Distribution (uses Turtle libary to draw the plot)


#You can run this in PyCharm
#It did not work for me in Jupyter
import turtle as tr
tr.home()
tr.hideturtle()
tr.clear()
tr.penup()
x = -600
y = 380
step_y = 50
step_x = 50
tr.goto(y, x)
tr.pencolor("black")

linesplit = ""
f = open(r'C:\\Users\Emil\PycharmProjects\pythonTurtle\Data\Test02.txt', "r")
lines = f.readline()

#NOTE: Make sure your file does not have extra tabs
#It is a good practice to copy data from the original spreasheed
#and paste in a new spreasheed then save as tab delimited

linesplit = lines.split("\t")
no_of_species = (len(linesplit))
line_cnt = 0
string_to_print = ""
# NOTE: This should be in the loop through the file lines
# However, it does not add computing time since it loops through
# the first line only which is then ignored in all lines loop
tr.delay(0)
while True:
    line_cnt += 1
    if (line_cnt < no_of_species):
        tr.penup()
        tr.goto(x, y)
        string_to_print = linesplit[line_cnt]
        indx = string_to_print.find(" ")
        #NOTE: It does not print subspecies names, only a binomial species name
        string_to_print = string_to_print[0]+string_to_print[indx+1]
        tr.write(string_to_print, True, align="left")
        x += 30
        tr.goto(y, x)
    else:
        break
LN_cnt = 0
string_to_print = ""
i = -650
j = 380
tr.penup()
tr.goto(i, j)
tr.pencolor("black")
tr.write("Depth", True, align="left")
j -= 30
tr.pensize(3)
for OneLine in f.readlines():
    cnt = -1
    i = -650
    LN = OneLine.split("\t")
    tr.pencolor("black")
    while True:
        tr.penup()
        cnt += 1
        if cnt < len(LN):
            if cnt == 0:
                tr.goto(i, j)
                tr.write(LN[cnt], True, align="left")
                i = -600
            else:
                tr.penup()
                tr.goto(i, j+7)  
                tr.pencolor("blue")
                if int(LN[cnt]) == 0:
                    tr.pensize(0)
                else:
                    tr.pensize(3)

                tr.pendown()
                tr.forward(int(LN[cnt]))
                i += 30
        else:
            break
    j -= 30
f.close()
i = -650
j -= 30
tr.penup()
tr.goto(i, j)
tr.color("black")
tr.write("LEGEND:", True, align="left")
tr.mainloop()

Top

Python Code for Computing and Plotting Species/Abundance Distribution (uses pyPlot libary for plotting)

   

# I run this in Jupyter
import pandas as pd
import numpy as np
from matplotlib.ticker import StrMethodFormatter
from pathlib import Path
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
depth = []
species = []
abund = []
linesplit = ""
f = open(r'C:\\Users\Emil\PycharmProjects\pythonTurtle\Data\Test03BentForams.txt', "r")
lines = f.readline()
linesplit = lines.split("\t")
no_of_species = (len(linesplit))
line_cnt = 0
while True:
    line_cnt += 1
    if (line_cnt < no_of_species):
        species.append(linesplit[line_cnt].strip())
    else:
        break
line_no = -1;
num_data = []
for OneLine in f.readlines():
    line_no += 1
    cnt = -1
    LN = OneLine.split("\t")
    while True:
        cnt += 1
        if cnt < len(LN):
            if cnt == 0:
                depth.append(LN[cnt])
            else:
                abund.append(LN[cnt].strip())
        else:
            break 
    num_data.append(abund)
    abund = []   
f.close()
yrng = list(range(1, no_of_species))   
xrng = list(range(1, line_no+2))
df = pd.DataFrame(index = xrng, columns = yrng, dtype=float)
row = 0
for i in num_data:
    row += 1
    df.loc[row] = i
fig = plt.gcf()
fig.set_size_inches(20, 12)
fig.set_size_inches(20, 12, forward=True)
plt.subplot(1, no_of_species-1, 1).spines['right'].set_color('white')
x = df[1]
y = depth
plt.xticks([])
plt.barh(y, x)
plt.title(species[0], rotation=90, fontsize = 8)
plt.gca().invert_yaxis()        
plt.xlim(0, 10)
col = 1;
while True:
    col += 1
    if col <= no_of_species-1:
        plt.subplot(1, (no_of_species-1), col).spines['right'].set_color('white')
        x = df[col]
        y = depth
        plt.xticks([])
        plt.yticks([])
        plt.barh(y, x)
        plt.title(species[col-1], rotation=90, fontsize = 8)
        plt.gca().invert_yaxis()        
        plt.xlim(0, 10)
    else:
        break
fn = Path('~/PycharmProjects/pythonTurtle/DistributionChartSVG.svg').expanduser()
plt.savefig(fn, bbox_inches = 'tight')

plt.show()

Top

Python Code for Computing Sample to Sample Similarity (one section)

  
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
data = []
costheta = 0.0
# NOTE: Replace the path with the path to your file
# The file should be tab delimited (text file) and consist of only data no species name row,
# no depth (sample label) column
data1 = np.loadtxt(r'C:\\Users\Emil\PycharmProjects\VectorAlgebraPython\Data\U1319B_BenhticForams_dataonly.txt', 
                    delimiter= "\t")
no_of_rows = data1.shape[0]
no_of_cols = data1.shape[1]
row = 0
cnt = 0
sim_list_rng = list(range(1, no_of_rows+1))
#print(sim_list_rng) #Can take the comment symbol (#) out if you need to see this list
df = pd.DataFrame(index=sim_list_rng, columns = sim_list_rng, dtype=float)
simlist=[]
for a in data1:
    avector = np.array(a)
    for b in data1:
        bvector = np.array(b)
        asquared = avector@avector
        bsquared = bvector@bvector
        absquared = avector@bvector
        try:
            costheta = round(absquared/(np.sqrt(asquared)*np.sqrt(bsquared)),2)
        except:
            costheta = 0.0
        simlist.append(costheta)
    row += 1
    #my_series = pd.Series(data = simlist) not neede but it looks that it works with series as well as lists
    #df.loc[len(df.index)] = simlist #not needed
    df.loc[row] = simlist
    simlist = []
#print(df)
plt.subplots(figsize=(20,20))
sns.heatmap(df, cmap ='YlOrRd', linewidths = 0.30, annot = True

Top