Awk Fixed Length File Data Masking Deperzonalise
1 | #!/bin/bash |
2 | ################################################## |
3 | # Name : awkMask.sh |
4 | # Purpose : Data Mask Depersonalize Fixed Length Data File. |
5 | # Usage : |
6 | # awkMask.sh inputFile.dat > outputFile.dat |
7 | # Change History: |
8 | ################################################## |
9 | function f_awk { |
10 | awk ' |
11 | BEGIN { |
12 | FIELDWIDTHS = "22 20 20 84 50 138 20 999" |
13 | srand() # to generate different random numbers each run |
14 | } |
15 | NR == 1 { |
16 | print $0 # header line |
17 | } |
18 | NR > 1 { |
19 | if ( substr ( $0 , 1 , 3 ) == "TRA" ) |
20 | print $0 # TRAiling footer line |
21 | else { |
22 | printf $1 |
23 | # 2) "First Field" = "0" + 11 random digits + 8 spaces = total field length of 20 |
24 | if ( ! ( substr ( $2 , 1 , 4 ) == " " || substr ( $2 , 1 , 4 ) == "" ) ) { |
25 | printf "0" |
26 | printf "%011i" , int ( 1e11 * rand() ) |
27 | printf "%-8s" , "" |
28 | } |
29 | else |
30 | printf "%-20s" , "" |
31 | # 3) "Second Field" |
32 | if ( ! ( substr ( $3 , 1 , 4 ) == " " || substr ( $3 , 1 , 4 ) == "" ) ) { |
33 | printf "0" |
34 | printf "%011i" , int ( 1e11 * rand() ) |
35 | printf "%-8s" , "" |
36 | } |
37 | else |
38 | printf "%-20s" , "" |
39 | printf $4 |
40 | # 5) "Third Field" = "D" + 11 random digits + 38 spaces = total field length of 50 |
41 | # This is the only optional field depersonalised. |
42 | if ( ! ( substr ( $5 , 1 , 4 ) == " " || substr ( $5 , 1 , 4 ) == "" ) ) { |
43 | printf "D" |
44 | printf "%011i" , int ( 1e11 * rand() ) |
45 | printf "%-38s" , "" |
46 | } |
47 | else |
48 | printf "%-50s" , "" |
49 | printf $6 |
50 | # 7) "Fourth Field" |
51 | if ( ! ( substr ( $7 , 1 , 4 ) == " " || substr ( $7 , 1 , 4 ) == "" ) ) { |
52 | printf "0" |
53 | printf "%011i" , int ( 1e11 * rand() ) |
54 | printf "%-8s" , "" |
55 | } |
56 | else |
57 | printf "%-20s" , "" |
58 | print $8 |
59 | } |
60 | } |
61 | ' $1 |
62 | } |
63 | ################################################## |
64 | f_awk $1 |
65 | ################################################## |
66 | # (end of file). |
And a couple of extra functions if want to do in a loop for all files in a directory, or generate a hash check file:
1 | ################################################## |
2 | function f_hashctr { |
3 | cd /myOutputDir |
4 | sha1sum --tag $g_file > ${g_file%.DAT}.CTR |
5 | cd - > /dev/null |
6 | } |
7 | ################################################## |
8 | # main |
9 | g_inputDir= /my/input/directory/path |
10 | g_outputDir= /my/output/directory/path |
11 | for g_file in $( find $g_inputDir -maxdepth 1 -name '*.DAT' - type f - printf "%f\n" ) |
12 | do |
13 | f_awk $g_inputDir/$g_file > $g_outputDir/$g_file |
14 | if [ -s $g_outputDir/$g_file ] |
15 | then |
16 | f_hashctr |
17 | rm $g_inputDir/$g_file |
18 | rm -f $g_inputDir/${g_file%.DAT}.CTR |
19 | fi |
20 | done |
21 | ################################################## |
22 | # (end of file). |
Leave a Reply