Awk Fixed Length File Data Masking Deperzonalise
#!/bin/bash ################################################## # Name : awkMask.sh # Purpose : Data Mask Depersonalize Fixed Length Data File. # Usage : # awkMask.sh inputFile.dat > outputFile.dat # Change History: ################################################## function f_awk { awk ' BEGIN { FIELDWIDTHS = "22 20 20 84 50 138 20 999" srand() # to generate different random numbers each run } NR == 1 { print $0 # header line } NR > 1 { if ( substr ( $0 , 1 , 3 ) == "TRA" ) print $0 # TRAiling footer line else { printf $1 # 2) "First Field" = "0" + 11 random digits + 8 spaces = total field length of 20 if ( ! ( substr ( $2 , 1 , 4 ) == " " || substr ( $2 , 1 , 4 ) == "" ) ) { printf "0" printf "%011i", int ( 1e11 * rand() ) printf "%-8s" , "" } else printf "%-20s" , "" # 3) "Second Field" if ( ! ( substr ( $3 , 1 , 4 ) == " " || substr ( $3 , 1 , 4 ) == "" ) ) { printf "0" printf "%011i", int ( 1e11 * rand() ) printf "%-8s" , "" } else printf "%-20s" , "" printf $4 # 5) "Third Field" = "D" + 11 random digits + 38 spaces = total field length of 50 # This is the only optional field depersonalised. if ( ! ( substr ( $5 , 1 , 4 ) == " " || substr ( $5 , 1 , 4 ) == "" ) ) { printf "D" printf "%011i", int ( 1e11 * rand() ) printf "%-38s" , "" } else printf "%-50s" , "" printf $6 # 7) "Fourth Field" if ( ! ( substr ( $7 , 1 , 4 ) == " " || substr ( $7 , 1 , 4 ) == "" ) ) { printf "0" printf "%011i", int ( 1e11 * rand() ) printf "%-8s" , "" } else printf "%-20s" , "" print $8 } } ' $1 } ################################################## f_awk $1 ################################################## # (end of file).
And a couple of extra functions if want to do in a loop for all files in a directory, or generate a hash check file:
################################################## function f_hashctr { cd /myOutputDir sha1sum --tag $g_file > ${g_file%.DAT}.CTR cd - > /dev/null } ################################################## # main g_inputDir=/my/input/directory/path g_outputDir=/my/output/directory/path for g_file in $(find $g_inputDir -maxdepth 1 -name '*.DAT' -type f -printf "%f\n") do f_awk $g_inputDir/$g_file > $g_outputDir/$g_file if [ -s $g_outputDir/$g_file ] then f_hashctr rm $g_inputDir/$g_file rm -f $g_inputDir/${g_file%.DAT}.CTR fi done ################################################## # (end of file).
Leave a Reply