Browse Source

ready to be deployed

cjs3
Stephen Lorenz 3 years ago
parent
commit
d85d3c0e98
  1. 191
      bin/efm.R
  2. 2
      bin/lrmix.R
  3. 197
      bin/lrmix_multisampleUsage_v2.r
  4. 16
      cjs-data/init/input/reqbt-480-3.csv
  5. 16
      cjs-data/init/input/reqbt-480-4.csv
  6. 1
      cjs-data/init/input/reqbt-480-reqbt-480-3-Asian.json
  7. 1
      cjs-data/init/input/reqbt-480-reqbt-480-3-Black.json
  8. 1
      cjs-data/init/input/reqbt-480-reqbt-480-3-Caucasian.json
  9. 1
      cjs-data/init/input/reqbt-480-reqbt-480-3-Hispanic.json
  10. 1
      cjs-data/init/input/reqbt-480-reqbt-480-4-Asian.json
  11. 1
      cjs-data/init/input/reqbt-480-reqbt-480-4-Black.json
  12. 1
      cjs-data/init/input/reqbt-480-reqbt-480-4-Caucasian.json
  13. 1
      cjs-data/init/input/reqbt-480-reqbt-480-4-Hispanic.json
  14. 45
      cjs-data/init/input/reqbt-480.tsv
  15. 2
      cjs-data/init/output/qualitative_reqbt-480-3-Asian.csv
  16. 2
      cjs-data/init/output/qualitative_reqbt-480-3-Black.csv
  17. 2
      cjs-data/init/output/qualitative_reqbt-480-3-Caucasian.csv
  18. 2
      cjs-data/init/output/qualitative_reqbt-480-3-Hispanic.csv
  19. 2
      cjs-data/init/output/qualitative_reqbt-480-4-Asian.csv
  20. 2
      cjs-data/init/output/qualitative_reqbt-480-4-Black.csv
  21. 2
      cjs-data/init/output/qualitative_reqbt-480-4-Hispanic.csv
  22. 13
      cjs/cjs/convert.py
  23. 32
      cjs/cjs/core/batch.py
  24. 25907
      cjs/data/jfs2003id.json
  25. 6
      cjs/data/mailer.json

191
bin/efm.R

@ -1,191 +0,0 @@
#!/usr/bin/env Rscript
require(euroformix);# sessionInfo()
# setwd("C:\\Users\\oyvbl\\Dropbox\\Forensic\\euroformix0\\runMultipleSamples") #IMPORTANT TO SET YOUR WORKDIRECTORY GIVEN AS SAME FOLDER AS YOUR FILES/EVIDENCE-FOLDERS # cjs: stephen
#rm(list=ls())
#source("lrmix_multisampleUsage.R")
library(forensim)
################
#help functions#
################
readFreq <- function(file) { #import popfrequencies:
table <- read.table(file,header=TRUE,sep=",")
locs <- toupper(colnames(table[-1]))
popFreq <- list()
for(i in 1:length(locs)) {
freqs <- table[,i+1]
popFreq[[i]] <- table[!is.na(freqs),i+1]
names(popFreq[[i]]) <- table[!is.na(freqs),1]
}
names(popFreq) <- locs
return(popFreq)
}
tableReader=function(filename) {
tab <- read.table(filename,header=TRUE,sep="\t",stringsAsFactors=FALSE)
tryCatch( { if(ncol(tab)==1) tab <- read.table(filename,header=TRUE,sep=",",stringsAsFactors=FALSE) } ,error=function(e) e)
tryCatch( { if(ncol(tab)==1) tab <- read.table(filename,header=TRUE,sep=";",stringsAsFactors=FALSE) } ,error=function(e) e)
if(ncol(tab)==1) tab <- read.table(filename,header=TRUE,sep=";",stringsAsFactors=FALSE)
return(tab) #need dataframe to keep allele-names correct!!
}
sample_tableToList = function(X,threshT=NULL) {
cn = colnames(X) #colnames
lind = grep("marker",tolower(cn),fixed=TRUE) #locus col-ind
if(length(lind)==0) lind = grep("loc",tolower(cn),fixed=TRUE) #try another name
sind = grep("sample",tolower(cn),fixed=TRUE) #sample col-ind
if(length(sind)>1) sind = sind[grep("name",tolower(cn[sind]),fixed=TRUE)] #use only sample name
A_ind = grep("allele",tolower(cn),fixed=TRUE) #allele col-ind
H_ind = grep("height",tolower(cn),fixed=TRUE) #height col-ind
ln = unique(toupper(X[,lind])) #locus names: Convert to upper case
sn = unique(as.character(X[,sind])) #sample names
I = length(ln)
Y = list() #insert non-empty characters:
for(k in 1:length(sn)) { #for each sample in matrix
Y[[sn[k]]] = list() #one list for each sample
for(i in 1:I) { #for each locus
xind = X[,sind]==sn[k] & toupper(X[,lind])==ln[i] #get index in X for given sample and locus
if(sum(xind)==0) next
keep <- which(!is.na(X[xind,A_ind]) & X[xind,A_ind]!="")
if(length(H_ind)>0) { #If peak heights are considered
PH <- as.numeric(as.character(X[xind,H_ind][keep])) #get the peak heights
if(!is.null(threshT)) keep = which(PH>=threshT) #keep only alleles above thrshold (if given)
Y[[sn[k]]][[ln[i]]]$hdata = PH[keep]
}
if(length(A_ind)>0) {
Y[[sn[k]]][[ln[i]]]$adata = as.character(X[xind,A_ind][keep])
}
}
}
names(Y) <- sn
return(Y)
}
getData <- function(mixData2,refData2,popFreq) { #Helpfunction to get data to analyse
locs <- names(popFreq)
mixData <- lapply(mixData2,function(x) return(x[locs])) #return selected loci
refData <- list()
for(loc in locs) refData[[loc]] <- lapply(refData2,function(x) return(x[[loc]]$adata)) #return selected loci
Qret <- Qassignate(samples=mixData, popFreq, refData,incS=FALSE,incR=FALSE) #NB: NOTICE THE CHANGE HERE OF inclS=FALSE even for stutter model (this has been updated in v2(
return(list(samples=mixData,refData=Qret$refData,popFreq=Qret$popFreq))
}
calcLR <- function(pD) {
LR<-1
pDvec = rep(pD,nC)
for(loc in names(dat$popFreq)) { #for each locus
Ei <- NULL #get evidence
for(ss in 1:length(dat$samples)) { #fix samples
if(ss>1) Ei <- c(Ei,0) #seperate with 0
adata <-dat$samples[[ss]][[loc]]$adata
if(length(adata)==0) adata=0 #is empty
Ei <- c(Ei,adata)
}
rdata <- dat$refData[[loc]] #reference data
hpval <- likEvid( Ei,T=unlist(rdata),V=NULL,x=nC-1,theta=fst, prDHet=pDvec, prDHom=pDvec^2, prC=pC, freq=dat$popFreq[[loc]])
hdval <- likEvid( Ei,T=NULL,V=unlist(rdata),x=nC,theta=fst, prDHet=pDvec, prDHom=pDvec^2, prC=pC, freq=dat$popFreq[[loc]])
LR <- LR*hpval/hdval
} #end for each markers
return(LR)
}
###################################################################
#SCRIPT STARTS HERE:
library("rjson") # cjs: stephen
args = commandArgs(trailingOnly=TRUE) # cjs: stephen
testFile = fromJSON(file=args[1]) # cjs: stephen
settings = fromJSON(file=paste0(testFile$resource_dir, "/settings.json")) # cjs: stephen
workingDir = testFile$working_dir # cjs: stephen
setwd(workingDir) # cjs: stephen
#get popfreq file:
databaseFile = paste0(testFile$resource_dir, "/", "Identifiler_", testFile$population, ".csv") # opt$database # cjs: stephen
#The allele frequency file
popFreq <- readFreq(databaseFile) #import population freqs
#names(popFreq) #loci to consider
#Get evidences (files)
# evidfold <- "evids" #opt$samples #The folder-name with files including evidence profiles # cjs: stephen
# files = list.files(evidfold) # cjs: stephen
#get references:
refFile <- testFile$comparison_file # opt$ref #the file including references # cjs: stephen
refData=sample_tableToList(tableReader(refFile)) #load references
rN <- names(refData) #names of references
#Model setup:
kit = settings$kit
threshT = settings$threshT # cjs: stephne # 200 #opt$threshold #25 #detection threshold (rfu)
fst = settings$fst #cjs: stephen # 0.01
nC = testFile$num_contributors #cjs: stephen # opt$unknowns #assumed number of contributors
dropin= settings$dropin # cjs: stephen # TRUE #opt$doDropin #consider drop-in model?
pC=0
if(dropin) {
if (testFile$num_replicates == 2) {
pC = 0.02
}
else if (testFile$num_replicates == 3) {
pC = 0.035
}
else {
stop('Bad number of replicates')
} # cjs: stephen
}
#Outfile to store results
setup <- paste0("T",threshT,"_fst",fst,"_pC",pC,"_C",nC)
outf <- paste0(testFile$output_dir, "/qualitative_", testFile$test_name, "-", testFile$population, ".csv") # cjs: stephen
cn=c("EvidFile","POI","log10LR")
out = matrix(nrow=0,ncol=length(cn))
colnames(out) = cn
# Loop over cases
begin=Sys.time() #start timer
evidfile = testFile$evidence_file # cjs: stephen # paste0(evidfold,"/",files[i]) #evidence files assumed to be looking in the evidfolder
mixData = sample_tableToList( X=tableReader( evidfile),threshT=threshT ) #get sample to analyse. NOTICE THAT THE PEAK HEIGHT THRESHOLD IS GIVEN AS ARGUMENT
for(j in 1:length(rN)) {
refData2 <- refData[j] #consider only ref "j" as POI
hpcond <- c(1) #Hp condition: ref i is contributor 1. This example only consider 1 reference profile. With x reference profiles this must be a x long vector.
hdcond <- c(0) #Hd condition: ref i is not-contributor .This example only consider 1 reference profile. With x reference profiles this must be a x long vector.
knownRefHd <- 1 #condition under Hd that ref i is a known non-contributors. This is a vector specifying which of the i-th references that are known non-contributors under hd.
#plotEPG(Data=mixData,kitname=kit,threshT=threshT,refcond=refData2,showPH=TRUE) #plotting evidence with ref
dat <- getData(mixData,refData2,popFreq) #process data for euroformix calculations (NOTICE THE CHANGE HERE OF NOT INCLUDING STUTTERS)
nS = length(dat$samples) #number of samples
#Perform calculatations
set.seed(1)
totAv <- sapply(dat$samples, function(x) sum(sapply(x,function(y) length(y$adata)))) #get number of alleles
refData3 <- list()
for(loc in names(dat$popFreq)) refData3[[loc]] <- lapply(refData2,function(x) x[[loc]]$adata) #get format for simDOdistr
dropqq <- c(0.05,0.95) #quantiles to estimate
diMd <- diMp <- rep(0,length(dropqq))#
niter = 1e4 #required number of samples
for(s in 1: nS) { #fix samples
dihd <- simDOdistr(totA=totAv[s],nC=nC,popFreq,refData=NULL,minS=niter, prC=pC,M=2000) #consider only model under Hd
dihp <- simDOdistr(totA=totAv[s],nC=nC,popFreq,refData=refData3,minS=niter, prC=pC,M=2000) #consider only model under Hd
diMd <- quantile(dihd,dropqq)/nS
diMp <- quantile(dihp,dropqq)/nS
}
div <- c(diMp,diMd)
LRmc <- Vectorize(calcLR)(div)
LR <- LRmc[which.min(LRmc)] #get conservative LR in LRmix
# update out object
out = rbind(out, c(testFile$evidence_name,rN[j],LR))
# Export overall results
write.table(out,file=outf,row.names=FALSE)
}
end=Sys.time() #end timer
runtime=difftime(end,begin) #Calculate the total running time:
paste("Time taken: ", sprintf("%.2fmin", runtime))

2
bin/lrmix.R

@ -103,7 +103,7 @@ workingDir = testFile$working_dir # cjs: stephen
setwd(workingDir) # cjs: stephen
#get popfreq file:
databaseFile = paste0(testFile$resource_dir, "/", "Identifiler_", testFile$population, ".csv") # opt$database # cjs: stephen
databaseFile = paste0(testFile$resource_dir, "/ocme_population/", "Identifiler_", testFile$population, ".csv") # opt$database # cjs: stephen
#The allele frequency file
popFreq <- readFreq(databaseFile) #import population freqs
#names(popFreq) #loci to consider

197
bin/lrmix_multisampleUsage_v2.r

@ -1,197 +0,0 @@
#!/usr/bin/env Rscript
equire(euroformix); sessionInfo()
#setwd("C:\\Users\\oyvbl\\Dropbox\\Forensic\\LRmixStudio")
#rm(list=ls())
#source("lrmix_multisampleUsage.R")
library(forensim);library(euroformix)
################
#help functions#
################
readFreq <- function(file) { #import popfrequencies:
table <- read.table(file,header=TRUE,sep=",")
locs <- toupper(colnames(table[-1]))
popFreq <- list()
for(i in 1:length(locs)) {
freqs <- table[,i+1]
popFreq[[i]] <- table[!is.na(freqs),i+1]
names(popFreq[[i]]) <- table[!is.na(freqs),1]
}
names(popFreq) <- locs
return(popFreq)
}
tableReader2=function(filename) {
tab <- read.table(filename,header=TRUE,sep="\t",stringsAsFactors=FALSE,row.names=NULL)
tryCatch( { if(ncol(tab)==1) tab <- read.table(filename,header=TRUE,sep=",",stringsAsFactors=FALSE,row.names=NULL) } ,error=function(e) e)
tryCatch( { if(ncol(tab)==1) tab <- read.table(filename,header=TRUE,sep=";",stringsAsFactors=FALSE,row.names=NULL) } ,error=function(e) e)
if(ncol(tab)==1) tab <- read.table(filename,header=TRUE,sep=";",stringsAsFactors=FALSE,row.names=NULL)
return(tab) #need dataframe to keep allele-names correct!!
}
sample_tableToList = function(X,threshT=NULL) {
cn = colnames(X) #colnames
lind = grep("marker",tolower(cn),fixed=TRUE) #locus col-ind
if(length(lind)==0) lind = grep("loc",tolower(cn),fixed=TRUE) #try another name
sind = grep("sample",tolower(cn),fixed=TRUE) #sample col-ind
if(length(sind)>1) sind = sind[grep("name",tolower(cn[sind]),fixed=TRUE)] #use only sample name
A_ind = grep("allele",tolower(cn),fixed=TRUE) #allele col-ind
H_ind = grep("height",tolower(cn),fixed=TRUE) #height col-ind
ln = unique(toupper(X[,lind])) #locus names: Convert to upper case
sn = unique(as.character(X[,sind])) #sample names
I = length(ln)
Y = list() #insert non-empty characters:
for(k in 1:length(sn)) { #for each sample in matrix
Y[[sn[k]]] = list() #one list for each sample
for(i in 1:I) { #for each locus
xind = X[,sind]==sn[k] & toupper(X[,lind])==ln[i] #get index in X for given sample and locus
if(sum(xind)==0) next
keep <- which(!is.na(X[xind,A_ind]) & X[xind,A_ind]!="")
if(length(H_ind)>0) { #If peak heights are considered
PH <- as.numeric(as.character(X[xind,H_ind][keep])) #get the peak heights
if(!is.null(threshT)) keep = which(PH>=threshT) #keep only alleles above thrshold (if given)
Y[[sn[k]]][[ln[i]]]$hdata = PH[keep]
}
if(length(A_ind)>0) {
Y[[sn[k]]][[ln[i]]]$adata = as.character(X[xind,A_ind][keep])
}
}
}
names(Y) <- sn
return(Y)
}
getData <- function(mixData2,refData2,popFreq) { #Helpfunction to get data to analyse
locs <- names(popFreq)
mixData <- lapply(mixData2,function(x) return(x[locs])) #return selected loci
refData <- list()
for(loc in locs) refData[[loc]] <- lapply(refData2,function(x) return(x[[loc]]$adata)) #return selected loci
Qret <- Qassignate(samples=mixData, popFreq, refData,incS=FALSE,incR=FALSE) #NB: NOTICE THE CHANGE HERE OF inclS=FALSE even for stutter model (this has been updated in v2(
return(list(samples=mixData,refData=Qret$refData,popFreq=Qret$popFreq))
}
calcLR <- function(pD) {
LR<-1
pDvec = rep(pD,nC)
for(loc in names(dat$popFreq)) { #for each locus
Ei <- NULL #get evidence
for(ss in 1:length(dat$samples)) { #fix samples
if(ss>1) Ei <- c(Ei,0) #seperate with 0
adata <-dat$samples[[ss]][[loc]]$adata
if(length(adata)==0) adata=0 #is empty
Ei <- c(Ei,adata)
}
rdata <- dat$refData[[loc]] #reference data
hpval <- likEvid( Ei,T=unlist(rdata),V=NULL,x=nC-1,theta=fst, prDHet=pDvec, prDHom=pDvec^2, prC=pC, freq=dat$popFreq[[loc]])
hdval <- likEvid( Ei,T=NULL,V=unlist(rdata),x=nC,theta=fst, prDHet=pDvec, prDHom=pDvec^2, prC=pC, freq=dat$popFreq[[loc]])
LR <- LR*hpval/hdval
} #end for each markers
return(LR)
}
###################################################################
#SCRIPT STARTS HERE:
library("rjson") # cjs: stephen
args = commandArgs(trailingOnly=TRUE) # cjs: stephen
testFile = fromJSON(file=args[1]) # cjs: stephen
settings = fromJSON(file=paste0(testFile$resource_dir, "/settings.json")) # cjs: stephen
workingDir = testFile$working_dir # cjs: stephen
setwd(workingDir) # cjs: stephen
#get popfreq file:
databaseFile = paste0(testFile$resource_dir, "/", "Identifiler_", testFile$population, ".csv") # opt$database # cjs: stephen
#The allele frequency file
popFreq <- readFreq(databaseFile) #import population freqs
#names(popFreq) #loci to consider
#Get evidences (files)
#evidfold <- "Evids" #opt$samples #The folder-name with files including evidence profiles
#files = list.files(evidfold)
#get references:
refFile <- testFile$comparison_file # opt$ref #the file including references # cjs: stephen
refData=sample_tableToList(tableReader2(refFile)) #load references
rN <- names(refData) #names of references
#Model setup:
kit = settings$kit
threshT = settings$threshT # cjs: stephne # 200 #opt$threshold #25 #detection threshold (rfu)
fst = settings$fst #cjs: stephen # 0.01
nC = testFile$num_contributors #cjs: stephen # opt$unknowns #assumed number of contributors
dropin= settings$dropin # cjs: stephen # TRUE #opt$doDropin #consider drop-in model?
pC=0
if(dropin) {
if (testFile$num_replicates == 2) {
pC = 0.02
}
else if (testFile$num_replicates == 3) {
pC = 0.035
}
else {
stop('Bad number of replicates')
} # cjs: stephen
}
#Outfile to store results
setup <- paste0("T",threshT,"_fst",fst,"_pC",pC,"_C",nC)
outf <- paste0(testFile$output_dir, "/qualitative_", testFile$test_name, "-", testFile$population, ".csv") # cjs: stephen
cn=c("EvidFile","POI","LR","Dropout")
out = matrix(nrow=0,ncol=length(cn))
colnames(out) = cn
# Loop over cases
begin=Sys.time() #start timer
evidfile = testFile$evidence_file # cjs: stephen # paste0(evidfold,"/",files[i]) #evidence files assumed to be looking in the evidfolder
mixData = sample_tableToList( X=tableReader2(evidfile),threshT=threshT ) #get sample to analyse. NOTICE THAT THE PEAK HEIGHT THRESHOLD IS GIVEN AS ARGUMENT
for(j in 1:length(rN)) { #for each reference
refData2 <- refData[j] #consider only ref "j" as POI
hpcond <- c(1) #Hp condition: ref i is contributor 1. This example only consider 1 reference profile. With x reference profiles this must be a x long vector.
hdcond <- c(0) #Hd condition: ref i is not-contributor .This example only consider 1 reference profile. With x reference profiles this must be a x long vector.
knownRefHd <- 1 #condition under Hd that ref i is a known non-contributors. This is a vector specifying which of the i-th references that are known non-contributors under hd.
#plotEPG(Data=mixData,kitname=kit,threshT=threshT,refcond=refData2,showPH=TRUE) #plotting evidence with ref
dat <- getData(mixData,refData2,popFreq) #process data for euroformix calculations (NOTICE THE CHANGE HERE OF NOT INCLUDING STUTTERS)
nS = length(dat$samples) #number of samples
#Perform calculatations
set.seed(1)
totAv <- sapply(dat$samples, function(x) sum(sapply(x,function(y) length(y$adata)))) #get number of alleles
refData3 <- list()
for(loc in names(dat$popFreq)) refData3[[loc]] <- lapply(refData2,function(x) x[[loc]]$adata) #get format for simDOdistr
dropqq <- c(0.05,0.95) #quantiles to estimate
totA=floor(mean(totAv)) #round down (this is what LRmix Studio)
niter = 1e4 #required number of samples
#PERFORM MC simulations:
dihp <- simDOdistr(totA=totA,nC=nC,popFreq,refData=refData3,minS=niter, prC=pC,M=2000) #consider only model under Hd
tmpHp = quantile(dihp,dropqq)
print("Hp quantiles"); print(tmpHp);
dihd <- simDOdistr(totA=totA,nC=nC,popFreq,refData=NULL,minS=niter, prC=pC,M=2000) #consider only model under Hd
tmpHd = quantile(dihd,dropqq)
print("Hd quantiles"); print(tmpHd)
div <- c(tmpHp ,tmpHd )
LRmc <- Vectorize(calcLR)(div)
dropind = which.min(LRmc)
dhat = div[dropind]
LR <- LRmc[dropind] #get conservative LR in LRmix
# update out object
out = rbind(out, c(testFile$evidence_name,rN[j],LR,signif(dhat,3)))
# Export overall results
write.table(out,file=outf,row.names=FALSE)
}
} #end loop for each evidence
end=Sys.time() #end timer
runtime=difftime(end,begin) #Calculate the total running time:
paste("Time taken: ", sprintf("%.2fmin", runtime))

16
cjs-data/init/input/reqbt-480-3.csv

@ -1,16 +0,0 @@
SampleName,Marker,Allele1,Allele2
reqbt-480-3,D19S433,14,14
reqbt-480-3,D21S11,28,30.2
reqbt-480-3,D18S51,13,15
reqbt-480-3,D16S539,11,13
reqbt-480-3,TH01,8,9.3
reqbt-480-3,vWA,15,18
reqbt-480-3,FGA,20,23
reqbt-480-3,D5S818,12,13
reqbt-480-3,TPOX,8,8
reqbt-480-3,D7S820,8,10
reqbt-480-3,D3S1358,15,15
reqbt-480-3,D2S1338,20,25
reqbt-480-3,D13S317,12,13
reqbt-480-3,D8S1179,14,14
reqbt-480-3,CSF1PO,12,12

16
cjs-data/init/input/reqbt-480-4.csv

@ -1,16 +0,0 @@
SampleName,Marker,Allele1,Allele2
reqbt-480-4,D19S433,13,15
reqbt-480-4,D21S11,31,31
reqbt-480-4,D18S51,12,19
reqbt-480-4,D16S539,11,11
reqbt-480-4,TH01,8,9.3
reqbt-480-4,vWA,16,17
reqbt-480-4,FGA,21,24
reqbt-480-4,D5S818,11,12
reqbt-480-4,TPOX,8,10
reqbt-480-4,D7S820,10,12
reqbt-480-4,D3S1358,16,16
reqbt-480-4,D2S1338,23,25
reqbt-480-4,D13S317,9,12
reqbt-480-4,D8S1179,11,14
reqbt-480-4,CSF1PO,9,11

1
cjs-data/init/input/reqbt-480-reqbt-480-3-Asian.json

@ -1 +0,0 @@
{"working_dir": "/home/csguest/Desktop/cjs3/cjs-data", "output_dir": "/home/csguest/Desktop/cjs3/cjs-data/init/output", "resource_dir": "/home/csguest/Desktop/cjs3/cjs/data", "evidence_file": "/home/csguest/Desktop/cjs3/cjs-data/init/input/reqbt-480.tsv", "comparison_file": "/home/csguest/Desktop/cjs3/cjs-data/init/input/reqbt-480-3.csv", "population": "Asian", "test_name": "reqbt-480-3", "evidence_name": "reqbt-480", "comparison_name": "reqbt-480-3", "num_contributors": 3, "num_replicates": 3}

1
cjs-data/init/input/reqbt-480-reqbt-480-3-Black.json

@ -1 +0,0 @@
{"working_dir": "/home/csguest/Desktop/cjs3/cjs-data", "output_dir": "/home/csguest/Desktop/cjs3/cjs-data/init/output", "resource_dir": "/home/csguest/Desktop/cjs3/cjs/data", "evidence_file": "/home/csguest/Desktop/cjs3/cjs-data/init/input/reqbt-480.tsv", "comparison_file": "/home/csguest/Desktop/cjs3/cjs-data/init/input/reqbt-480-3.csv", "population": "Black", "test_name": "reqbt-480-3", "evidence_name": "reqbt-480", "comparison_name": "reqbt-480-3", "num_contributors": 3, "num_replicates": 3}

1
cjs-data/init/input/reqbt-480-reqbt-480-3-Caucasian.json

@ -1 +0,0 @@
{"working_dir": "/home/csguest/Desktop/cjs3/cjs-data", "output_dir": "/home/csguest/Desktop/cjs3/cjs-data/init/output", "resource_dir": "/home/csguest/Desktop/cjs3/cjs/data", "evidence_file": "/home/csguest/Desktop/cjs3/cjs-data/init/input/reqbt-480.tsv", "comparison_file": "/home/csguest/Desktop/cjs3/cjs-data/init/input/reqbt-480-3.csv", "population": "Caucasian", "test_name": "reqbt-480-3", "evidence_name": "reqbt-480", "comparison_name": "reqbt-480-3", "num_contributors": 3, "num_replicates": 3}

1
cjs-data/init/input/reqbt-480-reqbt-480-3-Hispanic.json

@ -1 +0,0 @@
{"working_dir": "/home/csguest/Desktop/cjs3/cjs-data", "output_dir": "/home/csguest/Desktop/cjs3/cjs-data/init/output", "resource_dir": "/home/csguest/Desktop/cjs3/cjs/data", "evidence_file": "/home/csguest/Desktop/cjs3/cjs-data/init/input/reqbt-480.tsv", "comparison_file": "/home/csguest/Desktop/cjs3/cjs-data/init/input/reqbt-480-3.csv", "population": "Hispanic", "test_name": "reqbt-480-3", "evidence_name": "reqbt-480", "comparison_name": "reqbt-480-3", "num_contributors": 3, "num_replicates": 3}

1
cjs-data/init/input/reqbt-480-reqbt-480-4-Asian.json

@ -1 +0,0 @@
{"working_dir": "/home/csguest/Desktop/cjs3/cjs-data", "output_dir": "/home/csguest/Desktop/cjs3/cjs-data/init/output", "resource_dir": "/home/csguest/Desktop/cjs3/cjs/data", "evidence_file": "/home/csguest/Desktop/cjs3/cjs-data/init/input/reqbt-480.tsv", "comparison_file": "/home/csguest/Desktop/cjs3/cjs-data/init/input/reqbt-480-4.csv", "population": "Asian", "test_name": "reqbt-480-4", "evidence_name": "reqbt-480", "comparison_name": "reqbt-480-4", "num_contributors": 3, "num_replicates": 3}

1
cjs-data/init/input/reqbt-480-reqbt-480-4-Black.json

@ -1 +0,0 @@
{"working_dir": "/home/csguest/Desktop/cjs3/cjs-data", "output_dir": "/home/csguest/Desktop/cjs3/cjs-data/init/output", "resource_dir": "/home/csguest/Desktop/cjs3/cjs/data", "evidence_file": "/home/csguest/Desktop/cjs3/cjs-data/init/input/reqbt-480.tsv", "comparison_file": "/home/csguest/Desktop/cjs3/cjs-data/init/input/reqbt-480-4.csv", "population": "Black", "test_name": "reqbt-480-4", "evidence_name": "reqbt-480", "comparison_name": "reqbt-480-4", "num_contributors": 3, "num_replicates": 3}

1
cjs-data/init/input/reqbt-480-reqbt-480-4-Caucasian.json

@ -1 +0,0 @@
{"working_dir": "/home/csguest/Desktop/cjs3/cjs-data", "output_dir": "/home/csguest/Desktop/cjs3/cjs-data/init/output", "resource_dir": "/home/csguest/Desktop/cjs3/cjs/data", "evidence_file": "/home/csguest/Desktop/cjs3/cjs-data/init/input/reqbt-480.tsv", "comparison_file": "/home/csguest/Desktop/cjs3/cjs-data/init/input/reqbt-480-4.csv", "population": "Caucasian", "test_name": "reqbt-480-4", "evidence_name": "reqbt-480", "comparison_name": "reqbt-480-4", "num_contributors": 3, "num_replicates": 3}

1
cjs-data/init/input/reqbt-480-reqbt-480-4-Hispanic.json

@ -1 +0,0 @@
{"working_dir": "/home/csguest/Desktop/cjs3/cjs-data", "output_dir": "/home/csguest/Desktop/cjs3/cjs-data/init/output", "resource_dir": "/home/csguest/Desktop/cjs3/cjs/data", "evidence_file": "/home/csguest/Desktop/cjs3/cjs-data/init/input/reqbt-480.tsv", "comparison_file": "/home/csguest/Desktop/cjs3/cjs-data/init/input/reqbt-480-4.csv", "population": "Hispanic", "test_name": "reqbt-480-4", "evidence_name": "reqbt-480", "comparison_name": "reqbt-480-4", "num_contributors": 3, "num_replicates": 3}

45
cjs-data/init/input/reqbt-480.tsv

@ -1,45 +0,0 @@
Sample Name Marker Allele 1 Allele 2 Allele 3 Allele 4 Allele 5 Allele 6 ADO UD1
reqbt-480_1 D19S433 12 13 14 false
reqbt-480_1 D21S11 30.2 31 false
reqbt-480_1 D18S51 12 13 false
reqbt-480_1 D16S539 11 13 false
reqbt-480_1 TH01 6 8 9.3 false
reqbt-480_1 vWA 15 16 17 18 false
reqbt-480_1 FGA 20 21 22 23 24 25 false
reqbt-480_1 D5S818 10 11 12 false
reqbt-480_1 TPOX 8 9 10 false
reqbt-480_1 D7S820 8 10 12 false
reqbt-480_1 D3S1358 15 16 false
reqbt-480_1 D2S1338 25 false
reqbt-480_1 D13S317 9 12 13 false
reqbt-480_1 D8S1179 11 13 14 15 16 false
reqbt-480_1 CSF1PO 9 10 12 false
reqbt-480_2 D19S433 12 13 14 15 false
reqbt-480_2 D21S11 29 30.2 31 false
reqbt-480_2 D18S51 12 14 15 16 false
reqbt-480_2 D16S539 11 false
reqbt-480_2 TH01 8 9.3 false
reqbt-480_2 vWA 16 17 18 false
reqbt-480_2 FGA 22 23 false
reqbt-480_2 D5S818 11 12 13 false
reqbt-480_2 TPOX 8 10 false
reqbt-480_2 D7S820 11 false
reqbt-480_2 D3S1358 15 16 18 false
reqbt-480_2 D2S1338 25 false
reqbt-480_2 D13S317 8 10 12 false
reqbt-480_2 D8S1179 11 13 14 false
reqbt-480_3 D19S433 12 13 14 15 false
reqbt-480_3 D21S11 29 30 30.2 31 false
reqbt-480_3 D18S51 12 15 false
reqbt-480_3 D16S539 11 13 false
reqbt-480_3 TH01 6 7 8 9 9.3 false
reqbt-480_3 vWA 15 16 17 18 false
reqbt-480_3 FGA 21 22 23 24 false
reqbt-480_3 D5S818 9 11 12 13 false
reqbt-480_3 TPOX 8 false
reqbt-480_3 D7S820 10 12 false
reqbt-480_3 D3S1358 15 16 17 false
reqbt-480_3 D2S1338 25 false
reqbt-480_3 D13S317 9 10 12 13 false
reqbt-480_3 D8S1179 11 13 14 16 false
reqbt-480_3 CSF1PO 9 false

2
cjs-data/init/output/qualitative_reqbt-480-3-Asian.csv

@ -1,2 +0,0 @@
"EvidFile" "POI" "LR" "Dropout" "Race"
"reqbt-480" "reqbt-480-3" "2201226.61138267" "0.613" "Asian"

2
cjs-data/init/output/qualitative_reqbt-480-3-Black.csv

@ -1,2 +0,0 @@
"EvidFile" "POI" "LR" "Dropout" "Race"
"reqbt-480" "reqbt-480-3" "22017.3065409975" "0.617" "Black"

2
cjs-data/init/output/qualitative_reqbt-480-3-Caucasian.csv

@ -1,2 +0,0 @@
"EvidFile" "POI" "LR" "Dropout" "Race"
"reqbt-480" "reqbt-480-3" "5419.55296651941" "0.614" "Caucasian"

2
cjs-data/init/output/qualitative_reqbt-480-3-Hispanic.csv

@ -1,2 +0,0 @@
"EvidFile" "POI" "LR" "Dropout" "Race"
"reqbt-480" "reqbt-480-3" "20343.1454234912" "0.617" "Hispanic"

2
cjs-data/init/output/qualitative_reqbt-480-4-Asian.csv

@ -1,2 +0,0 @@
"EvidFile" "POI" "LR" "Dropout" "Race"
"reqbt-480" "reqbt-480-4" "4750673.48794637" "0.616" "Asian"

2
cjs-data/init/output/qualitative_reqbt-480-4-Black.csv

@ -1,2 +0,0 @@
"EvidFile" "POI" "LR" "Dropout" "Race"
"reqbt-480" "reqbt-480-4" "1515228.33120547" "0.618" "Black"

2
cjs-data/init/output/qualitative_reqbt-480-4-Hispanic.csv

@ -1,2 +0,0 @@
"EvidFile" "POI" "LR" "Dropout" "Race"
"reqbt-480" "reqbt-480-4" "1089835.95384817" "0.616" "Hispanic"

13
cjs/cjs/convert.py

@ -48,16 +48,16 @@ def convert(ctx):
def jfs2003id(ctx, input_dir, output_dir):
input_files = search_dir(input_dir, '*.csv')
output_dict = {}
race_list = []
for file_path in input_files:
with open(file_path, 'r') as f:
reader = csv.DictReader(f)
race_list = []
for row in reader:
tmp_dict = {}
name = row['Sample Info']
race = file_path.stem.split('-')[1][:-1]
tmp_dict['name'] = name
tmp_dict['race'] = race
for locus in jfs2003id_loci:
alleles = []
for i in range(2):
@ -65,7 +65,12 @@ def jfs2003id(ctx, input_dir, output_dir):
alleles.append(row[key])
tmp_dict[locus] = alleles
race_list.append(tmp_dict)
output_dict[file_path.stem.split('-')[1]] = race_list
output_dict = {
'jfs2003id': {
'type': 'comparison',
'data': race_list
}
}
write_json('%s/jfs2003id.json' % output_dir, output_dict)

32
cjs/cjs/core/batch.py

@ -6,6 +6,7 @@ import signal
import threading
import queue
import json
import time
import subprocess
from subprocess import TimeoutExpired
@ -64,6 +65,8 @@ class AutomateState:
# initialize a universal lock for all the workers
self.global_lock = threading.Lock()
self.threads = None
# a queue containing jobs
self.input_queue = queue.Queue(maxsize=queue_size)
# a queue containg results
@ -105,12 +108,13 @@ def generic_worker(func):
return result
return wrapper
def spawn_workers(target_func, State, num_workers, *args, **kwargs):
def spawn_workers(target_func, state, num_workers, *args, **kwargs):
# modified from https://docs.python.org/3.7/library/queue.html?highlight=queue#queue.Queue.join
thread_list = []
state.threads = thread_list
for i in range(num_workers):
t = threading.Thread(target=target_func,
args=(State, *args),
args=(state, *args),
kwargs={**kwargs})
t.start() # start the worker thread
thread_list.append(t) # store thread object
@ -131,15 +135,17 @@ def spawn_foreman(state, interface, database, scan_dir):
hault = True
signal.signal(signal.SIGINT, signal_handler)
pseudo_sleep = 0
none_count = 0
while True:
# get and add job queue
if not state.input_queue.full() and not hault:
job = interface.next_job(database)
if not state.input_queue.full():
if not hault:
job = interface.next_job(database)
if job == None:
none_count += 1
print('Foreman: Received dismissal signal')
else:
print(hault)
print('Foreman: Received %s and %s' % (job['evidence']['name'], job['comparison']['name']))
state.input_queue.put(job)
@ -180,5 +186,19 @@ def spawn_foreman(state, interface, database, scan_dir):
except queue.Empty as e:
pass
dead_threads = 0
for t in state.threads:
if not t.isAlive():
dead_threads += 1
if dead_threads == state.input_queue.maxsize:
hault = True
if pseudo_sleep == 0:
print('Foreman: Exiting program. Please wait 30 seconds for all I/O to complete...')
time.sleep(1)
pseudo_sleep += 1
if pseudo_sleep == 30:
sys.exit(0)
# stop watching the output directory
obs.stop()

25907
cjs/data/jfs2003id.json
File diff suppressed because it is too large
View File

6
cjs/data/mailer.json

@ -1,6 +0,0 @@
{
"server": "smtp.gmail.com",
"port": 587,
"login": "cucjsoftware@gmail.com",
"password": "@GreatGoat7"
}
Loading…
Cancel
Save