Awk Fixed Length File Data Masking Deperzonalise

#!/bin/bash
##################################################
# Name : awkMask.sh
# Purpose : Data Mask Depersonalize Fixed Length Data File.
# Usage :
#    awkMask.sh inputFile.dat > outputFile.dat
# Change History:
##################################################
function f_awk {
awk '
BEGIN {
  FIELDWIDTHS = "22 20 20 84 50 138 20 999"
  srand()  # to generate different random numbers each run
}
NR == 1 {
  print $0  # header line
}
NR > 1 {
  if ( substr ( $0 , 1 , 3 ) == "TRA" )
    print $0  # TRAiling footer line
  else {
    printf $1
    # 2) "First Field" = "0" + 11 random digits + 8 spaces = total field length of 20
    if ( ! ( substr ( $2 , 1 , 4 ) == "    " || substr ( $2 , 1 , 4 ) == "" ) ) {
      printf "0"
      printf "%011i", int ( 1e11 * rand() )
      printf "%-8s" , ""
    }
    else
      printf "%-20s" , ""
    # 3) "Second Field"
    if ( ! ( substr ( $3 , 1 , 4 ) == "    " || substr ( $3 , 1 , 4 ) == "" ) ) {
      printf "0"
      printf "%011i", int ( 1e11 * rand() )
      printf "%-8s" , ""
    }
    else
      printf "%-20s" , ""
    printf $4
    # 5) "Third Field" = "D" + 11 random digits + 38 spaces = total field length of 50
    # This is the only optional field depersonalised.
    if ( ! ( substr ( $5 , 1 , 4 ) == "    " || substr ( $5 , 1 , 4 ) == "" ) ) {
      printf "D"
      printf "%011i", int ( 1e11 * rand() )
      printf "%-38s" , ""
    }
    else
      printf "%-50s" , ""
    printf $6
    # 7) "Fourth Field"
    if ( ! ( substr ( $7 , 1 , 4 ) == "    " || substr ( $7 , 1 , 4 ) == "" ) ) {
      printf "0"
      printf "%011i", int ( 1e11 * rand() )
      printf "%-8s" , ""
    }
    else
      printf "%-20s" , ""
    print $8
  }
}
' $1
}
##################################################
f_awk $1
##################################################
# (end of file).

And a couple of extra functions if want to do in a loop for all files in a directory, or generate a hash check file:

##################################################
function f_hashctr {
cd /myOutputDir
sha1sum --tag $g_file > ${g_file%.DAT}.CTR
cd - > /dev/null
}
##################################################
# main
g_inputDir=/my/input/directory/path
g_outputDir=/my/output/directory/path
for g_file in $(find $g_inputDir -maxdepth 1 -name '*.DAT' -type f -printf "%f\n")
do
  f_awk $g_inputDir/$g_file > $g_outputDir/$g_file
  if [ -s $g_outputDir/$g_file ]
  then
    f_hashctr
    rm $g_inputDir/$g_file
    rm -f $g_inputDir/${g_file%.DAT}.CTR
  fi
done
##################################################
# (end of file).

May 29, 2023

Leave a Reply

Your email address will not be published. Required fields are marked *