#!/bin/bash

# Usage: bash -x split_by_awk.sh
# This script will split all_more_fields.csv into three files:
#   injector1.csv, injector2.csv, injector3.csv, each contain a set of packets
#   sent by different GFW processes.

# ts;qname;type;answer;cname;dns_ttl;dns_flags;dns_aa;ip_ttl;ip_id;ip_flags;df;tos
# $8 is DNS.AA; $12 is IP.DF

# We freeze the dataset by time between 1568692823.487619 and 1590094531.2941742.

# mawk may be faster than gawk

mawk '
BEGIN {
   FS=";";
   OFS=";";
}
NR==1 {
  print $0 > "injector1.csv";
  print $0 > "injector2.csv";
  print $0 > "injector3.csv";
}
{
if ($1<=1568692823.487619 || $1>=1590094531.2941742)
   next;
if ($8==1 && $12==0)
   print $0 > "injector1.csv";
else if ($8==0 && $12==1)
   print $0 > "injector2.csv";
else if ($8==0 && $12==0 && $13=="0x00000000")
   print $0 > "injector3.csv";
else {
   print "Should never be here: ", NR;
   print $0;
}
}' all_more_fields.csv
