Browse Source

way to find missing results

cjs3
Stephen Lorenz 3 years ago
parent
commit
49bb6aa584
  1. 1349012
      orphans/checklist.csv
  2. 33
      orphans/cross_join.py
  3. 11
      orphans/debug.py
  4. BIN
      orphans/default.db
  5. 4962
      orphans/euroformix-default-known.csv
  6. 1341986
      orphans/euroformix-default-non.csv
  7. 5012
      orphans/euroformix-full_run-known.csv
  8. 1341792
      orphans/euroformix-full_run-non.csv
  9. 30
      orphans/export.py
  10. 56
      orphans/find_missing.py
  11. 13306
      orphans/jfs2003id.json
  12. 1
      orphans/missing_default.json
  13. 1
      orphans/missing_fst.json
  14. 44670
      orphans/reqbt-fps.json

1349012
orphans/checklist.csv
File diff suppressed because it is too large
View File

33
orphans/cross_join.py

@ -0,0 +1,33 @@
#!/usr/bin/env python3
import csv
import json
def read_json(filename):
with open(filename, 'r') as fh:
return json.load(fh)
known_contributors = read_json('reqbt-fps.json')
non_contributors = read_json('jfs2003id.json')
run_list = []
races = ['asian', 'black', 'caucasian', 'hispanic']
evidence_names = []
for evidence in known_contributors['reqbt-fps']['data']:
evidence_names.append(evidence['name'])
for comparison in evidence['comparisons']:
for race in races:
run_list.append([evidence['name'] ,comparison['name'], race])
comparison_names = []
for evidence in evidence_names:
for comparison in non_contributors['jfs2003id']['data']:
for race in races:
run_list.append([evidence, comparison['name'], race])
with open('checklist.csv', 'w') as fh:
writer = csv.writer(fh)
writer.writerows(run_list)

11
orphans/debug.py

@ -0,0 +1,11 @@
test_a = [
'a,b,c',
'd,e,f',
'g,h,i'
]
test_b = [
'a,b,c',
]
print(set(test_a).difference(test_b))

BIN
orphans/default.db

4962
orphans/euroformix-default-known.csv
File diff suppressed because it is too large
View File

1341986
orphans/euroformix-default-non.csv
File diff suppressed because it is too large
View File

5012
orphans/euroformix-full_run-known.csv
File diff suppressed because it is too large
View File

1341792
orphans/euroformix-full_run-non.csv
File diff suppressed because it is too large
View File

30
orphans/export.py

@ -0,0 +1,30 @@
#!/usr/bin/env python3
import sqlite3
conn = sqlite3.connect('default.db')
curs = conn.cursor()
query = curs.execute('SELECT * FROM Result')
results = query.fetchall()
known = []
non = []
for r in results:
row = [r[1], r[2], r[4], r[3]]
if r[2][0:6] == 'reqbt-':
known.append(row)
else:
non.append(row)
import csv
with open('euroformix-default-known.csv', 'w') as f:
writer = csv.writer(f)
writer.writerows(known)
with open('euroformix-default-non.csv', 'w') as f:
writer = csv.writer(f)
writer.writerows(non)

56
orphans/find_missing.py

@ -0,0 +1,56 @@
#!/usr/bin/env python3
import csv
import json
from pprint import pprint
def read_csv(filename):
with open(filename, 'r') as fh:
reader = csv.reader(fh)
return list(reader)
check_file = 'checklist.csv'
check_data = read_csv('checklist.csv')
known_file = 'euroformix-full_run-known.csv'
known_data = read_csv(known_file)
non_file = 'euroformix-full_run-non.csv'
non_data = read_csv(non_file)
missing_known = []
missing_non = []
def drop_lr(data):
for row in data:
del row[2]
def flatten(data):
tmp = []
for row in data:
tmp.append(';'.join(row))
return tmp
drop_lr(known_data)
drop_lr(non_data)
check_data = flatten(check_data)
known_data = flatten(known_data)
non_data = flatten(non_data)
known_data.extend(non_data)
missing = set(check_data).difference(known_data)
whitelist = []
for m in missing:
tmp = m.split(';')
del tmp[2]
whitelist.append(tmp)
pprint(whitelist)
print(len(whitelist))
with open('missing_fst.json', 'w') as fh:
json.dump(whitelist, fh)

13306
orphans/jfs2003id.json
File diff suppressed because it is too large
View File

1
orphans/missing_default.json
File diff suppressed because it is too large
View File

1
orphans/missing_fst.json
File diff suppressed because it is too large
View File

44670
orphans/reqbt-fps.json
File diff suppressed because it is too large
View File

Loading…
Cancel
Save