Sunday, May 3, 2020

Playing with Virus data

Thanks
to the  Sage for the mathematical framework,
to the ECDC for the worldwide pandemic data,
to the China for the virus,

we can now play with the statistics and create some interesting graphs,
  • like the current number of days to double number of victims [should be high]
  • or the exponent of the current cases grows (if considered exponential) [should be low].
Copy the program, download the data (in CSV) and play .....


import csv
from datetime import date

# ------------------------------------------------------------------------------

date0 = date(2019, 12, 31)

# ------------------------------------------------------------------------------

# Functions defining graphs

# Cases / Deaths

def cases(v):
  return anything(v, 'cases')
  
def deaths(v):
  return anything(v, 'deaths')
 
def anything(v, c):
  v1 = {}
  #for x in v:
  #  v1[x] = v[x][c]
  for k in sorted(v.keys()):
      v1[k] = v[k][c]
  return v1
  
# Daily increase [%]
  
def delta_cases(v):
  return delta_any(v, 'cases')

def delta_deaths(v):
  return delta_any(v, 'deaths')
  
def delta_any(v, c):
  v1 = {}
  sum = 0
  for k in sorted(v.keys()):
    sum += v[k][c]
    if sum != 0:
      v1[k] = 100 * v[k][c] / sum
  return v1
  
# Number of days to double numbers 
  
def double_cases(v):
  return double_any(v, 'cases')
  
def double_deaths(v):
  return double_any(v, 'deaths')
   
def double_any(v, c):
  v1 = {}
  sum = 0
  for k in sorted(v.keys()):
    sum += v[k][c]
    if v[k][c] != 0:
      v1[k] = sum / v[k][c]
  return v1
 
# Exponet for exponential fit of latest days 
 
def exp_cases(v):
  return exp_any(v, 'cases')
 
def exp_deaths(v):
  return exp_any(v, 'deaths')
  
def exp_any(v, c):
  v1 = {}
  y0 = -1
  for k in sorted(v.keys()):
    if y0 > 0 and v[k][c] > 0:
      v1[k] = log(float(v[k][c])) - y0
    y0 = log(float(v[k][c]))
  return v1
 
# Mortality 
 
def mortality(v):
  v1 = {}
  sum = 0
  for k in sorted(v.keys()):
    sum += v[k]['deaths']
    if v[k]['cases'] != 0:
      v1[k] = 100 * v[k]['deaths'] / v[k]['cases']
  return v1
  
# Sigmoid fit  

def sigmoidFit(name, values):
  data = []
  sum = 0
  for v in values:
    sum += v[1]
    data += [(v[0], sum)]
  var('L, k, x0, x')
  model(x) = L / (1 + exp(- k * (x - x0)))
  sol = find_fit(data, model)
  print(sol)
  f(x) = model(L = sol[0].rhs(), k = sol[1].rhs(), x0 = sol[2].rhs())
  h = plot([])
  h += text(name,                                                                      ( 30, sol[0].rhs() / 2      ), fontsize = 10, color = 'black')
  h += text('today is ' + str(len(data) - round(sol[2].rhs())) + ' days after peak', (130, sol[0].rhs() / 2 * 0.9), fontsize = 10, color = 'black')
  h += plot(f(x), x, [0, len(data) * 2])
  h += list_plot(data, color = 'red')
  show(h)

# Reading and interpreting country data

def readCountry(country, f, normalised, integrated, offset, smear):
  val = {}
  with open('virus.csv', newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    for row in reader:
      date1 = date(int(row['year']), int(row['month']), int(row['day']))
      days = (date1 - date0).days
      if row['countriesAndTerritories'] == country:
        v = {}
        if normalised:
          v['cases']  = float(row['cases'])  / float(row['popData2018'])
          v['deaths'] = float(row['deaths']) / float(row['popData2018'])
        else:
          v['cases']  = int(row['cases'])
          v['deaths'] = int(row['deaths'])
        val[int(days)] = v
  if integrated:
    valI = {}
    sumC = 0
    sumD = 0
    for k in sorted(val.keys()):
      sumC += val[k]['cases']
      sumD += val[k]['deaths']
      valI[k] = {'cases':sumC, 'deaths':sumD}
    val = valI
  if smear > 0:
    valS = {}
    for k in sorted(val.keys()):
      sumC = 0
      sumD = 0
      n = 0
      for i in [0..smear]:
        if k + i in val.keys():
          n += 1
          sumC += val[k + i]['cases']
          sumD += val[k + i]['deaths']
      valS[k] = {'cases':(sumC / n), 'deaths':(sumD / n)}
    val = valS
  if offset > 0:
    valO = {}
    d    = 0
    for k in sorted(val.keys()):
      d += 1
      if d > offset:
        valO[k] = val[k]
    val = valO
  return f(val)

# Creating histogram

def plotHisto(values, logarithmic, col, country, doSpline):
  data = []
  maxx = 0
  for val in values:
    data += [(val, values[val])]
    if val > maxx:
      maxx = val
  sc = 'linear'
  if logarithmic:
    sc = 'semilogy'
  p = list_plot(data, scale = sc, color = col)
  if (doSpline):
    s = spline(data)
    p += plot(s, 0, maxx,  scale = sc, color = col, legend_label = country)
  return p

# ------------------------------------------------------------------------------
       
# Define group of countries to analyse            
countries = [
             ('France',                   'red'),
             ('Italy',                    'green'),
             ('Germany',                  'grey'),
             ('Belgium',                  'pink'),
             ('Russia',                   'purple'),
             ('Spain',                    'orange'),
             ('Czechia',                  'blue'), 
             ('Sweden',                   'brown'), 
             ('Japan',                    'yellow'), 
             ('United_Kingdom',           'violet'), 
             ('United_States_of_America', 'black')
             ]
countries0 = [
              ('France',                   'red'),
              ('Italy',                    'green'),
              ('Czechia',                  'blue')
              ]
countriesF = [
              ('France',                   'red'),
              ]
       
# Set graphs to create       
# - for analytical graphs              
optionsList = [
               {'normalised':true,   'integrated':false, 'logarithmic':false, 'smear':5,  'offset':60, 'doSpline':true, 'countries':countries,  'f':[cases, deaths]},
               {'normalised':true,   'integrated':false, 'logarithmic':false, 'smear':0,  'offset':60, 'doSpline':true, 'countries':countries,  'f':[cases, deaths]},
               {'normalised':true,   'integrated':true,  'logarithmic':false, 'smear':0,  'offset':60, 'doSpline':true, 'countries':countries,  'f':[cases, deaths]},
               {'normalised':false,  'integrated':false, 'logarithmic':false, 'smear':0,  'offset':60, 'doSpline':true, 'countries':countries0, 'f':[cases, deaths]},
               {'normalised':false,  'integrated':true,  'logarithmic':false, 'smear':0,  'offset':60, 'doSpline':true, 'countries':countries0, 'f':[cases, deaths]},
               {'normalised':true,  'integrated':true,  'logarithmic':true,  'smear':5,  'offset':60, 'doSpline':true, 'countries':countries,  'f':[cases, deaths]},
               {'normalised':false, 'integrated':false, 'logarithmic':false, 'smear':5,  'offset':60, 'doSpline':true, 'countries':countries,  'f':[delta_cases, delta_deaths]},
               {'normalised':false, 'integrated':false, 'logarithmic':false, 'smear':5,  'offset':60, 'doSpline':true, 'countries':countries,  'f':[double_cases, double_deaths]},
               {'normalised':false, 'integrated':false, 'logarithmic':false, 'smear':5,  'offset':60, 'doSpline':true, 'countries':countries,  'f':[exp_cases, exp_deaths]},
               {'normalised':false, 'integrated':false, 'logarithmic':false, 'smear':10, 'offset':60, 'doSpline':true, 'countries':countries,  'f':[mortality]},
               ]
# - for sigmoid fit
countries4fit = countriesF
  
# ------------------------------------------------------------------------------
   
# Main loop for analytical graphs
for options in optionsList:
  normalised  = options['normalised']
  integrated  = options['integrated']
  logarithmic = options['logarithmic']
  smear       = options['smear']
  offset      = options['offset']
  doSpline    = options['doSpline']
  countries    = options['countries']
  for f in options['f']:
    if f == delta_cases or f == delta_deaths or f == double_cases or f == double_deaths:
      integrated = false
    if f == exp_cases or f == exp_deaths:
      integrated = true
      normalised = false
    #if integrated:
    #  offset = 0
    h = plot([])
    for (country, color) in countries:
      h += plotHisto(readCountry(country, f, normalised, integrated, offset, smear), logarithmic, color, country, doSpline)
    x = (3 * h.get_minmax_data()['xmin'] + h.get_minmax_data()['xmax']) / 4
    y = (3 * h.get_minmax_data()['ymin'] + h.get_minmax_data()['ymax']) / 4
    t = f.__name__
    if normalised:
      t += ",\nnormalised"
    if integrated:
      t += ",\nintegrated"
    if logarithmic:
      t += ",\nlogarithmic"
    if smear > 0:
      t += ",\nsmear = " + str(smear)
    h += text(t, (x, y), fontsize = 10, color = 'black')
    show(h)

# Main loop for sigmoid fit
for country in countries4fit:
  for h in [cases, deaths]:
    values = readCountry(country[0], h, false, false, 0, 0)
    a = []
    for v in values:
      a += [(v, values[v])] 
    sigmoidFit(country[0] + "\n" + h.__name__, a)

# ------------------------------------------------------------------------------

No comments:

Post a Comment