Awk Fixed Length File Data Masking Deperzonalise

1#!/bin/bash
2##################################################
3# Name : awkMask.sh
4# Purpose : Data Mask Depersonalize Fixed Length Data File.
5# Usage :
6#    awkMask.sh inputFile.dat > outputFile.dat
7# Change History:
8##################################################
9function f_awk {
10awk '
11BEGIN {
12  FIELDWIDTHS = "22 20 20 84 50 138 20 999"
13  srand()  # to generate different random numbers each run
14}
15NR == 1 {
16  print $0  # header line
17}
18NR > 1 {
19  if ( substr ( $0 , 1 , 3 ) == "TRA" )
20    print $0  # TRAiling footer line
21  else {
22    printf $1
23    # 2) "First Field" = "0" + 11 random digits + 8 spaces = total field length of 20
24    if ( ! ( substr ( $2 , 1 , 4 ) == "    " || substr ( $2 , 1 , 4 ) == "" ) ) {
25      printf "0"
26      printf "%011i", int ( 1e11 * rand() )
27      printf "%-8s" , ""
28    }
29    else
30      printf "%-20s" , ""
31    # 3) "Second Field"
32    if ( ! ( substr ( $3 , 1 , 4 ) == "    " || substr ( $3 , 1 , 4 ) == "" ) ) {
33      printf "0"
34      printf "%011i", int ( 1e11 * rand() )
35      printf "%-8s" , ""
36    }
37    else
38      printf "%-20s" , ""
39    printf $4
40    # 5) "Third Field" = "D" + 11 random digits + 38 spaces = total field length of 50
41    # This is the only optional field depersonalised.
42    if ( ! ( substr ( $5 , 1 , 4 ) == "    " || substr ( $5 , 1 , 4 ) == "" ) ) {
43      printf "D"
44      printf "%011i", int ( 1e11 * rand() )
45      printf "%-38s" , ""
46    }
47    else
48      printf "%-50s" , ""
49    printf $6
50    # 7) "Fourth Field"
51    if ( ! ( substr ( $7 , 1 , 4 ) == "    " || substr ( $7 , 1 , 4 ) == "" ) ) {
52      printf "0"
53      printf "%011i", int ( 1e11 * rand() )
54      printf "%-8s" , ""
55    }
56    else
57      printf "%-20s" , ""
58    print $8
59  }
60}
61' $1
62}
63##################################################
64f_awk $1
65##################################################
66# (end of file).

And a couple of extra functions if want to do in a loop for all files in a directory, or generate a hash check file:

1##################################################
2function f_hashctr {
3cd /myOutputDir
4sha1sum --tag $g_file > ${g_file%.DAT}.CTR
5cd - > /dev/null
6}
7##################################################
8# main
9g_inputDir=/my/input/directory/path
10g_outputDir=/my/output/directory/path
11for g_file in $(find $g_inputDir -maxdepth 1 -name '*.DAT' -type f -printf "%f\n")
12do
13  f_awk $g_inputDir/$g_file > $g_outputDir/$g_file
14  if [ -s $g_outputDir/$g_file ]
15  then
16    f_hashctr
17    rm $g_inputDir/$g_file
18    rm -f $g_inputDir/${g_file%.DAT}.CTR
19  fi
20done
21##################################################
22# (end of file).
May 29, 2023

Leave a Reply

Your email address will not be published. Required fields are marked *