General Introduction

Dark corners are basically fractal—no matter how much you illuminate, there’s always a smaller but darker one. · n

Michael Brennan
Author of mawk
March 2001

Michael Brennan
Author of mawk
October 2014

$ awk 1 /dev/null
error→ awk: syntax error near line 1
error→ awk: bailing out near line 1

$ echo hi on stdout
-| hi on stdout
$ echo hello on stderr 1>&2
error→ hello on stderr

pattern { action }
pattern { action }
…

awk 'program' input-file1 input-file2 …

awk -f program-file input-file1 input-file2 …

awk 'program' input-file1 input-file2 …

awk 'program'

$ awk 'BEGIN { print "Don\47t Panic!" }'
-| Don't Panic!

$ awk '{ print }'
Now is the time for all good men
-| Now is the time for all good men
to come to the aid of their country.
-| to come to the aid of their country.
Four score and seven years ago, ...
-| Four score and seven years ago, ...
What, me worry?
-| What, me worry?
Ctrl-d

awk -f source-file input-file1 input-file2 …

BEGIN { print "Don't Panic!" }

awk -f advice

awk 'BEGIN { print "Don\47t Panic!" }'

#! /bin/awk -f
BEGIN { print "Don't Panic!" }

$ chmod +x advice
$ ./advice
-| Don't Panic!

# This program prints a nice, friendly message.  It helps
# keep novice users from being afraid of the computer.
BEGIN    { print "Don't Panic!" }

$ awk 'BEGIN { print "hello" } # let's be cute'
>

$ awk '{ print "hello" } # let's be cute'
> '
error→ awk: can't open file be
error→  source line number 1

awk 'program text' input-file1 input-file2 …

$ echo ""

awk 'BEGIN { print "Don\47t Panic!" }'

$ awk "BEGIN { print \"Don't Panic!\" }"
-| Don't Panic!

awk -F "" 'program' files # correct

awk -F"" 'program' files  # wrong!

$ awk 'BEGIN { print "Here is a single quote <'"'"'>" }'
-| Here is a single quote <'>

$ awk 'BEGIN { print "Here is a single quote <'\''>" }'
-| Here is a single quote <'>

$ awk "BEGIN { print \"Here is a single quote <'>\" }"
-| Here is a single quote <'>

$ awk 'BEGIN { print "Here is a single quote <\47>" }'
-| Here is a single quote <'>
$ awk 'BEGIN { print "Here is a double quote <\42>" }'
-| Here is a double quote <">

$ awk -v sq="'" 'BEGIN { print "Here is a single quote <" sq ">" }'
-| Here is a single quote <'>

{ print "\"" $0 "\"" }

gawk "{ print \"\042\" $0 \"\042\" }" file

gawk "{ print \"\\\"\" $0 \"\\\"\" }" file

Amelia       555-5553     [email protected]    F
Anthony      555-3412     [email protected]   A
Becky        555-7685     [email protected]      A
Bill         555-1675     [email protected]       A
Broderick    555-0542     [email protected] R
Camilla      555-2912     [email protected]     R
Fabius       555-1234     [email protected]    F
Julie        555-6699     [email protected]   F
Martin       555-6480     [email protected]    A
Samuel       555-3430     [email protected]        A
Jean-Paul    555-2127     [email protected]     R

Jan  13  25  15 115
Feb  15  32  24 226
Mar  15  24  34 228
Apr  31  52  63 420
May  16  34  29 208
Jun  31  42  75 492
Jul  24  34  67 436
Aug  15  34  47 316
Sep  13  55  37 277
Oct  29  54  68 525
Nov  20  87  82 577
Dec  17  35  61 401
Jan  21  36  64 620
Feb  26  58  80 652
Mar  24  75  70 495
Apr  21  70  74 514

awk '/li/ { print $0 }' mail-list

$ awk '/li/ { print $0 }' mail-list
-| Amelia       555-5553     [email protected]    F
-| Broderick    555-0542     [email protected] R
-| Julie        555-6699     [email protected]   F
-| Samuel       555-3430     [email protected]        A

-rw-r--r--  1 arnold   user   1933 Nov  7 13:05 Makefile
-rw-r--r--  1 arnold   user  10809 Nov  7 13:03 awk.h
-rw-r--r--  1 arnold   user    983 Apr 13 12:14 awk.tab.h
-rw-r--r--  1 arnold   user  31869 Jun 15 12:20 awkgram.y
-rw-r--r--  1 arnold   user  22414 Nov  7 13:03 awk1.c
-rw-r--r--  1 arnold   user  37455 Nov  7 13:03 awk2.c
-rw-r--r--  1 arnold   user  27511 Dec  9 13:07 awk3.c
-rw-r--r--  1 arnold   user   7989 Nov  7 13:03 awk4.c

awk 'length($0) > 80' data

awk '{ if (length($0) > max) max = length($0) }
     END { print max }' data

expand data | awk '{ if (x < length($0)) x = length($0) }
                   END { print "maximum line length is " x }'

awk 'NF > 0' data

awk 'BEGIN { for (i = 1; i <= 7; i++)
                 print int(101 * rand()) }'

ls -l files | awk '{ x += $5 }
                   END { print "total bytes: " x }'

ls -l files | awk '{ x += $5 }
   END { print "total K-bytes:", x / 1024 }'

awk -F: '{ print $1 }' /etc/passwd | sort

awk 'END { print NR }' data

awk 'NR % 2 == 0' data

/12/  { print $0 }
/21/  { print $0 }

$ awk '/12/ { print $0 }
>      /21/ { print $0 }' mail-list inventory-shipped
-| Anthony      555-3412     [email protected]   A
-| Camilla      555-2912     [email protected]     R
-| Fabius       555-1234     [email protected]    F
-| Jean-Paul    555-2127     [email protected]     R
-| Jean-Paul    555-2127     [email protected]     R
-| Jan  21  36  64 620
-| Apr  21  70  74 514

ls -l | awk '$6 == "Nov" { sum += $5 }
             END { print sum }'

awk '/12/  { print $0 }
     /21/  { print $0 }' mail-list inventory-shipped

,    {    ?    :    ||    &&    do    else

awk '/This regular expression is too long, so continue it\
 on the next line/ { print $1 }'

% awk 'BEGIN { \
?   print \\
?       "hello, world" \
? }'
-| hello, world

$ awk 'BEGIN {
>   print \
>       "hello, world"
> }'
-| hello, world

$ gawk 'BEGIN { print "dont panic" # a friendly \
>                                    BEGIN rule
> }'
error→ gawk: cmd. line:2:                BEGIN rule
error→ gawk: cmd. line:2:                ^ syntax error

/12/ { print $0 } ; /21/ { print $0 }

awk [options] -f progfile [--] file …
awk [options] [--] 'program' file …

awk '' datafile1 datafile2

$ gawk -e 'BEGIN { a = 5 ;' -e 'print a }'
-| 5

#! /usr/local/bin/gawk -E
awk program here …

POSIXLY_CORRECT=true
export POSIXLY_CORRECT

setenv POSIXLY_CORRECT true

awk -f program.awk file1 count=1 file2

awk -f program.awk file1 ./count=1 file2

awk 'pass == 1  { pass 1 stuff }
     pass == 2  { pass 2 stuff }' pass=1 mydata pass=2 mydata

f=""
awk '{ print("hi") }' $f

$ f=""
$ awk 'BEGIN { print ARGC }' $f
-| 1
$ awk 'BEGIN { print ARGC }' "$f"
-| 2

some_command | awk -f myprog.awk file1 - file2

BEGIN {
    print "This is script test1."
}

@include "test1"
BEGIN {
    print "This is script test2."
}

$ gawk -f test2
-| This is script test1.
-| This is script test2.

@include "test2"
BEGIN {
    print "This is script test3."
}

$ gawk -f test3
-| This is script test1.
-| This is script test2.
-| This is script test3.

@include "../io_funcs"

@include "/usr/awklib/network"

$ gawk '@load "ordchr"; BEGIN {print chr(65)}'
-| A

$ gawk -lordchr 'BEGIN {print chr(65)}'
-| A

$ awk '/li/ { print $2 }' mail-list
-| 555-5553
-| 555-0542
-| 555-6699
-| 555-3430

exp ~ /regexp/

$ awk '$1 ~ /J/' inventory-shipped
-| Jan  13  25  15 115
-| Jun  31  42  75 492
-| Jul  24  34  67 436
-| Jan  21  36  64 620

awk '{ if ($1 ~ /J/) print }' inventory-shipped

exp !~ /regexp/

$ awk '$1 !~ /J/' inventory-shipped
-| Feb  15  32  24 226
-| Mar  15  24  34 228
-| Apr  31  52  63 420
-| May  16  34  29 208
…

$ awk 'BEGIN { print "He said \"hi!\" to her." }'
-| He said "hi!" to her.

if ("line1\nLINE 2" ~ /^L/) …

if ("line1\nLINE 2" ~ /1$/) …

$ awk '
> BEGIN {
>     x = "ABC_CBA"
>     gsub(/B/, "bb", x)
>     print x
> }'
-| AbbC_CbbA

$ awk '
> BEGIN {
>     x = "ABC"
>     gsub(//, "x", x)
>     print x
> }'
-| xAxBxCx

[d\]]

echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'

$ echo aaaabcd | awk '{ sub(/a+/, "<A>"); print }'
-| <A>bcd

BEGIN { digits_regexp = "[[:digit:]]+" }
$0 ~ digits_regexp    { print }

$ awk '$0 ~ "[ \t\n]"'
error→ awk: newline in character class [
error→ ]...
error→  source line number 1
error→  context is
error→        $0 ~ "[ >>>  \t\n]" <<<

$ awk '$0 ~ /[ \t\n]/'
here is a sample line
-| here is a sample line
Ctrl-d

tolower($1) ~ /foo/  { … }

x = "aB"
if (x ~ /ab/) …   # this test will fail
IGNORECASE = 1
if (x ~ /ab/) …   # now it will succeed

awk 'BEGIN { RS = "u" }
     { print $0 }' mail-list

$ awk 'BEGIN { RS = "u" }
>      { print $0 }' mail-list

-| Amelia       555-5553     amelia.zodiac
-| sq
-| [email protected]    F
-| Anthony      555-3412     anthony.assert
-| [email protected]   A
-| Becky        555-7685     becky.algebrar
-| [email protected]      A
-| Bill         555-1675     [email protected]       A
-| Broderick    555-0542     broderick.aliq
-| [email protected] R
-| Camilla      555-2912     camilla.inf
-| sar
-| [email protected]     R
-| Fabi
-| s       555-1234     fabi
-| s.
-| ndevicesim
-| s@
-| cb.ed
-|     F
-| J
-| lie        555-6699     j
-| lie.perscr
-| [email protected]   F
-| Martin       555-6480     martin.codicib
-| [email protected]    A
-| Sam
-| el       555-3430     sam
-| el.lanceolis@sh
-| .ed
-|         A
-| Jean-Pa
-| l    555-2127     jeanpa
-| l.campanor
-| m@ny
-| .ed
-|      R
-|

Bill         555-1675     [email protected]       A

awk '{ print $0 }' RS="u" mail-list

$ echo record 1 AAAA record 2 BBBB record 3 |
> gawk 'BEGIN { RS = "\n|( *[[:upper:]]+ *)" }
>             { print "Record =", $0,"and RT = [" RT "]" }'

-| Record = record 1 and RT = [ AAAA ]
-| Record = record 2 and RT = [ BBBB ]
-| Record = record 3 and RT = [
-| ]

BEGIN { RS = "\0" }  # whole file becomes one record?

This seems like a pretty nice example.

$ awk '$1 ~ /li/ { print $0 }' mail-list
-| Amelia       555-5553     [email protected]    F
-| Julie        555-6699     [email protected]   F

$ awk '/li/ { print $1, $NF }' mail-list
-| Amelia F
-| Broderick R
-| Julie F
-| Samuel A

awk '{ print $NR }'

awk '{ print $(2*2) }' mail-list

$ awk '{ nboxes = $3 ; $3 = $3 - 10
>        print nboxes, $3 }' inventory-shipped
-| 25 15
-| 32 22
-| 24 14
…

$ awk '{ $2 = $2 - 10; print $0 }' inventory-shipped
-| Jan 3 25 15 115
-| Feb 5 32 24 226
-| Mar 5 24 34 228
…

$ awk '{ $6 = ($5 + $4 + $3 + $2)
>        print $6 }' inventory-shipped
-| 168
-| 297
-| 301
…

if ($(NF+1) != "")
    print "can't happen"
else
    print "everything is normal"

$ echo a b c d | awk '{ OFS = ":"; $2 = ""
>                       print $0; print NF }'
-| a::c:d
-| 4

$ echo a b c d | awk '{ OFS = ":"; $2 = ""; $6 = "new"
>                       print $0; print NF }'
-| a::c:d::new
-| 6

$ echo a b c d e f | awk '{ print "NF =", NF;
>                           NF = 3; print $0 }'
-| NF = 6
-| a b c

$1 = $1   # force record to be reconstituted
print $0  # or whatever else with $0

moo goo gai pan

awk 'BEGIN { FS = "," } ; { print $2 }'

John Q. Smith, 29 Oak St., Walamazoo, MI 42139

John Q. Smith, LXIX, 29 Oak St., Walamazoo, MI 42139

FS = ", \t"

$ echo ' a b c d ' | awk '{ print $2 }'
-| b

$ echo ' a  b  c  d ' | awk 'BEGIN { FS = "[ \t\n]+" }
>                                  { print $2 }'
-| a

$ echo '   a b c d' | awk '{ print; $2 = $2; print }'
-|    a b c d
-| a b c d

$ echo 'xxAA  xxBxx  C' |
> gawk -F '(^x+)|( +)' '{ for (i = 1; i <= NF; i++)
>                             printf "-->%s<--\n", $i }'
-| --><--
-| -->AA<--
-| -->xxBxx<--
-| -->C<--

$ echo a b | gawk 'BEGIN { FS = "" }
>                  {
>                      for (i = 1; i <= NF; i = i + 1)
>                          print "Field", i, "is", $i
>                  }'
-| Field 1 is a
-| Field 2 is
-| Field 3 is b

awk -F, 'program' input-files

# same as FS = "\\"
awk -F\\\\ '…' files …

/edu/   { print $1 }

$ awk -F- -f edu.awk mail-list
-| Fabius       555
-| Samuel       555
-| Jean

Jean-Paul    555-2127     [email protected]     R

arnold:x:2076:10:Arnold Robbins:/home/arnold:/bin/bash

awk -F: '$5 == ""' /etc/passwd

awk -F'\n' 'program' files …

sed 1q /etc/passwd | awk '{ FS = ":" ; print $1 }'

root

root:x:0:0:Root:/:

FS = "c"
IGNORECASE = 1
$0 = "aCa"
print $1

 10:06pm  up 21 days, 14:04,  23 users
User     tty       login  idle   JCPU   PCPU  what
hzuo     ttyV0     8:58pm            9      5  vi p24.tex
hzang    ttyV3     6:37pm    50                -csh
eklye    ttyV5     9:53pm            7      1  em thes.tex
dportein ttyV6     8:17pm  1:47                -csh
gierd    ttyD3    10:00pm     1                elm
dave     ttyD4     9:47pm            4      4  w
brent    ttyp0    26Jun91  4:46  26:46   4:41  bash
dave     ttyq4    26Jun9115days     46     46  wnewmail

BEGIN  { FIELDWIDTHS = "9 6 10 6 7 7 35" }
NR > 2 {
    idle = $4
    sub(/^ +/, "", idle)   # strip leading spaces
    if (idle == "")
        idle = 0
    if (idle ~ /:/) {      # hh:mm
        split(idle, t, ":")
        idle = t[1] * 60 + t[2]
    }
    if (idle ~ /days/)
        idle *= 24 * 60 * 60
    print $1, $2, idle
}

hzuo      ttyV0  0
hzang     ttyV3  50
eklye     ttyV5  0
dportein  ttyV6  107
gierd     ttyD3  1
dave      ttyD4  0
brent     ttyp0  286
dave      ttyq4  1296000

BEGIN  { FIELDWIDTHS = "8 1:5 4:7 6 1:6 1:6 2:33" }

hzang    ttyV3 50
eklye    ttyV5 0
dportein ttyV6 107
gierd    ttyD3 1
dave     ttyD4 0
brent    ttyp0 286
dave     ttyq4 1296000

$ cat fw.awk                         Show the program
-| BEGIN { FIELDWIDTHS = "2 2 *" }
-| { print NF, $1, $2, $3 }
$ cat fw.in                          Show sample input
-| 1234abcdefghi
$ gawk -f fw.awk fw.in               Run the program
-| 3 12 34 abcdefghi

Robbins,Arnold,"1234 A Pretty Street, NE",MyTown,MyState,12345-6789,USA

FPAT = "([^,]+)|(\"[^\"]+\")"

BEGIN {
    FPAT = "([^,]+)|(\"[^\"]+\")"
}

{
    print "NF = ", NF
    for (i = 1; i <= NF; i++) {
        printf("$%d = <%s>\n", i, $i)
    }
}

$ gawk -f simple-csv.awk addresses.csv
NF =  7
$1 = <Robbins>
$2 = <Arnold>
$3 = <"1234 A Pretty Street, NE">
$4 = <MyTown>
$5 = <MyState>
$6 = <12345-6789>
$7 = <USA>

if (substr($i, 1, 1) == "\"") {
    len = length($i)
    $i = substr($i, 2, len - 2)    # Get text within the two quotes
}

FPAT = "([^,]*)|(\"[^\"]+\")"

p,"q,r",s
p,"q""r",s
p,"q,""r",s
p,"",s
p,,s

BEGIN {
     fp[0] = "([^,]+)|(\"[^\"]+\")"
     fp[1] = "([^,]*)|(\"[^\"]+\")"
     fp[2] = "([^,]*)|(\"([^\"]|\"\")+\")"
     FPAT = fp[fpat+0]
}

{
     print "<" $0 ">"
     printf("NF = %s ", NF)
     for (i = 1; i <= NF; i++) {
         printf("<%s>", $i)
     }
     print ""
}

$ gawk -v fpat=2 -f test-csv.awk sample.csv
-| <p,"q,r",s>
-| NF = 3 <p><"q,r"><s>
-| <p,"q""r",s>
-| NF = 3 <p><"q""r"><s>
-| <p,"q,""r",s>
-| NF = 3 <p><"q,""r"><s>
-| <p,"",s>
-| NF = 3 <p><""><s>
-| <p,,s>
-| NF = 3 <p><><s>

if (PROCINFO["FS"] == "FS")
    regular field splitting …
else if (PROCINFO["FS"] == "FIELDWIDTHS")
    fixed-width field splitting …
else if (PROCINFO["FS"] == "FPAT")
    content-based field splitting …
else
    API input parser field splitting … (advanced feature)

Jane Doe
123 Main Street
Anywhere, SE 12345-6789
John Smith
456 Tree-lined Avenue
Smallville, MW 98765-4321
…

# addrs.awk --- simple mailing list program
# Records are separated by blank lines.
# Each line is one field.
BEGIN { RS = "" ; FS = "\n" }
{
      print "Name is:", $1
      print "Address is:", $2
      print "City and State are:", $3
      print ""
}

$ awk -f addrs.awk addresses
-| Name is: Jane Doe
-| Address is: 123 Main Street
-| City and State are: Anywhere, SE 12345-6789
-|
-| Name is: John Smith
-| Address is: 456 Tree-lined Avenue
-| City and State are: Smallville, MW 98765-4321
-|
…

# Remove text between /* and */, inclusive
{
    while ((start = index($0, "/*")) != 0) {
        out = substr($0, 1, start - 1)  # leading part of the string
        rest = substr($0, start + 2)    # ... */ ...
        while ((end = index(rest, "*/")) == 0) {  # is */ in trailing part?
            # get more text
            if (getline <= 0) {
                print("unexpected EOF or error:", ERRNO) > "/dev/stderr"
                exit
            }
            # build up the line using string concatenation
            rest = rest $0
        }
        rest = substr(rest, end + 2)  # remove comment
        # build up the output line using string concatenation
        $0 = out rest
    }
    print $0
}

mon/*comment*/key
rab/*commen
t*/bit
horse /*comment*/more text
part 1 /*comment*/part 2 /*comment*/part 3
no comment

$ awk -f strip_comments.awk example_text
-| monkey
-| rabbit
-| horse more text
-| part 1 part 2 part 3
-| no comment

{
     if ((getline tmp) > 0) {
          print tmp
          print $0
     } else
          print $0
}

wan
tew
free
phore

tew
wan
phore
free

{
    if ($1 == 10) {
         getline < "secondary.input"
         print
    } else
         print
}

{
     if (NF == 2 && $1 == "@include") {
          while ((getline line < $2) > 0)
               print line
          close($2)
     } else
          print
}

{
     if ($1 == "@execute") {
          tmp = substr($0, 10)        # Remove "@execute"
          while ((tmp | getline) > 0)
               print
          close(tmp)
     } else
          print
}

foo
bar
baz
@execute who
bletch

foo
bar
baz
arnold     ttyv0   Jul 13 14:22
miriam     ttyp0   Jul 13 14:23     (murphy:0)
bill       ttyp1   Jul 13 14:23     (murphy:0)
bletch

BEGIN {
     "date" | getline current_time
     close("date")
     print "Report printed on " current_time
}

print "some query" |& "db_server"
"db_server" |& getline

BEGIN {
    system("echo 1 > f")
    while ((getline a[++c] < "f") > 0) { }
    print c
}

PROCINFO["input_name", "READ_TIMEOUT"] = timeout in milliseconds

Service = "/inet/tcp/0/localhost/daytime"
PROCINFO[Service, "READ_TIMEOUT"] = 100
if ((Service |& getline) > 0)
    print $0
else if (ERRNO != "")
    print ERRNO

PROCINFO["/dev/stdin", "READ_TIMEOUT"] = 5000
while ((getline < "/dev/stdin") > 0)
    print $0

$ gawk 'BEGIN { PROCINFO["-", "READ_TIMEOUT"] = 5000 }
> { print "You entered: " $0 }'
gawk
-| You entered: gawk

error→ gawk: cmd. line:2: (FILENAME=- FNR=1) fatal: error reading input file `-': Connection timed out

PROCINFO[Service, "READ_TIMEOUT"] = 1000
while ((Service |& getline) > 0) {
    print $0
    PROCINFO[Service, "READ_TIMEOUT"] -= 100
}

PROCINFO["input_name", "RETRY"] = 1

$ gawk -f whizprog.awk *        Directories could kill this program

print item1, item2, …

$ awk 'BEGIN { print "line one\nline two\nline three" }'
-| line one
-| line two
-| line three

$ awk '{ print $1, $2 }' inventory-shipped
-| Jan 13
-| Feb 15
-| Mar 15
…

$ awk '{ print $1 $2 }' inventory-shipped
-| Jan13
-| Feb15
-| Mar15
…

awk 'BEGIN {  print "Month Crates"
              print "----- ------" }
           {  print $1, $2 }' inventory-shipped

Month Crates
----- ------
Jan 13
Feb 15
Mar 15
…

awk 'BEGIN { print "Month Crates"
             print "----- ------" }
           { print $1, "     ", $2 }' inventory-shipped

$ awk 'BEGIN { OFS = ";"; ORS = "\n\n" }
>            { print $1, $2 }' mail-list
-| Amelia;555-5553
-|
-| Anthony;555-3412
-|
-| Becky;555-7685
-|
-| Bill;555-1675
-|
-| Broderick;555-0542
-|
-| Camilla;555-2912
-|
-| Fabius;555-1234
-|
-| Julie;555-6699
-|
-| Martin;555-6480
-|
-| Samuel;555-3430
-|
-| Jean-Paul;555-2127
-|

$ awk 'BEGIN {
>   OFMT = "%.0f"  # print numbers as integers (rounds)
>   print 17.23, 17.54 }'
-| 17 18

printf format, item1, item2, …

$ awk 'BEGIN {
>    ORS = "\nOUCH!\n"; OFS = "+"
>    msg = "Don\47t Panic!"
>    printf "%s\n", msg
> }'
-| Don't Panic!

printf "%4.3e\n", 1950

printf "%4.3f", 1950

printf "%s %s\n", "don't", "panic"
printf "%2$s %1$s\n", "panic", "don't"

printf "%-4s", "foo"

$ cat thousands.awk          Show source program
-| BEGIN { printf "%'d\n", 1234567 }
$ LC_ALL=C gawk -f thousands.awk
-| 1234567                   Results in "C" locale
$ LC_ALL=en_US.UTF-8 gawk -f thousands.awk
-| 1,234,567                 Results in US English UTF locale

printf "%4s", "foo"

printf "%4s", "foobar"

printf "%.4s", "foobar"

w = 5
p = 3
s = "abcdefg"
printf "%*.*s\n", w, p, s

s = "abcdefg"
printf "%5.3s\n", s

w = 5
p = 3
s = "abcdefg"
printf "%" w "." p "s\n", s

awk '{ printf "%-10s %s\n", $1, $2 }' mail-list

$ awk '{ printf "%-10s %s\n", $1, $2 }' mail-list
-| Amelia     555-5553
-| Anthony    555-3412
-| Becky      555-7685
-| Bill       555-1675
-| Broderick  555-0542
-| Camilla    555-2912
-| Fabius     555-1234
-| Julie      555-6699
-| Martin     555-6480
-| Samuel     555-3430
-| Jean-Paul  555-2127

awk 'BEGIN { print "Name      Number"
             print "----      ------" }
           { printf "%-10s %s\n", $1, $2 }' mail-list

awk 'BEGIN { printf "%-10s %s\n", "Name", "Number"
             printf "%-10s %s\n", "----", "------" }
           { printf "%-10s %s\n", $1, $2 }' mail-list

awk 'BEGIN { format = "%-10s %s\n"
             printf format, "Name", "Number"
             printf format, "----", "------" }
           { printf format, $1, $2 }' mail-list

$ awk '{ print $2 > "phone-list"
>        print $1 > "name-list" }' mail-list
$ cat phone-list
-| 555-5553
-| 555-3412
…
$ cat name-list
-| Amelia
-| Anthony
…

awk '{ print $1 > "names.unsorted"
       command = "sort -r > names.sorted"
       print $1 | command }' mail-list

report = "mail bug-system"
print("Awk script failed:", $0) | report
print("at record number", FNR, "of", FILENAME) | report
close(report)

# clear the file
print "Don't panic" > "guide.txt"
…
# append
print "Avoid improbability generators" >> "guide.txt"

{ printf("mv %s %s\n", $0, tolower($0)) | "sh" }
END { close("sh") }

print "Serious error detected!" | "cat 1>&2"

print "Serious error detected!" > "/dev/tty"

print "Serious error detected!" > "/dev/stderr"

/net-type/protocol/local-port/remote-host/remote-port

close(filename)

close(command)

"sort -r names" | getline foo

close("sort -r names")

sortcom = "sort -r names"
sortcom | getline foo

…
close(sortcom)

{
    …
    command = ("grep " $1 " /some/file | my_prog -q " $3)
    while ((command | getline) > 0) {
        process output of command
    }
    # need close(command) here
}

command = "…"
command | getline info
retval = close(command)  # syntax error in many Unix awks

$ gawk 'BEGIN { print "hi" > "/no/such/file" }'
error→ gawk: cmd. line:1: fatal: can't redirect to `/no/such/file' (No
error→ such file or directory)

$ gawk '
> BEGIN {
>     PROCINFO["NONFATAL"] = 1
>     ERRNO = 0
>     print "hi" > "/no/such/file"
>     if (ERRNO) {
>         print("Output failed:", ERRNO) > "/dev/stderr"
>         exit 1
>     }
> }'
error→ Output failed: No such file or directory

awk 'BEGIN { print "Month Crates"
             print "----- ------" }
           { print $1, "     ", $2 }' inventory-shipped

BEGIN { print "Serious error detected!" > /dev/stderr }

105
1.05e+2
1050e-1

"parrot"

#include <stdio.h>
int main()
{
    printf("hello, \
world\n");
    return 0;
}

$ gawk 'BEGIN { print "hello, 
> world" }'
-| gawk: cmd. line:1: BEGIN { print "hello,
-| gawk: cmd. line:1:               ^ unterminated string
-| gawk: cmd. line:1: BEGIN { print "hello,
-| gawk: cmd. line:1:               ^ syntax error

$ gawk 'BEGIN { print "hello, \
> world" }'
-| hello, world

$ nawk 'BEGIN { print "hello, \
> world" }'
-| hello,
-| world

$ gawk 'BEGIN { printf "%d, %d, %d\n", 011, 11, 0x11 }'
-| 9, 11, 17

$ gawk 'BEGIN { print "021 is", 021 ; print 018 }'
-| 021 is 17
-| 18

$ gawk 'BEGIN { printf "0x11 is <%s>\n", 0x11 }'
-| 0x11 is <17>

if ($0 ~ /barfly/ || $0 ~ /camelot/)
    print "found"

if (/barfly/ || /camelot/)
    print "found"

# Note that /foo/ is on the left of the ~
if (/foo/ ~ $1) print "found foo"

matches = /foo/

function mysub(pat, repl, str, global)
{
    if (global)
        gsub(pat, repl, str)
    else
        sub(pat, repl, str)
    return str
}

{
    …
    text = "hi! hi yourself!"
    mysub(/hi/, "howdy", text, 1)
    …
}

num = 42        Numeric variable
str = "hi"      String variable
re = /foo/      Wrong! re is the result of $0 ~ /foo/

re = @/foo/     Regexp variable

gawk -v pattern='@/something(interesting)+/' …

re = @/don't panic/
sub(/don't/, "do", re)
print typeof(re), re

variable=text

-v variable=text

awk '{ print $n }' n=4 inventory-shipped n=2 mail-list

$ awk '{ print $n }' n=4 inventory-shipped n=2 mail-list
-| 15
-| 24
…
-| 555-5553
-| 555-3412
…

gawk -v 're1=@/foo|bar/' '…' /path/to/file1 're2=@/baz|quux/' /path/to/file2

two = 2; three = 3
print (two three) + 4

CONVFMT = "%2.2f"
a = 12
b = a ""

$ export POSIXLY_CORRECT=1                        Force POSIX behavior
$ gawk 'BEGIN { printf "%g\n", 3.1415927 }'
-| 3.14159
$ LC_ALL=en_DK.utf-8 gawk 'BEGIN { printf "%g\n", 3.1415927 }'
-| 3,14159
$ echo 4,321 | gawk '{ print $1 + 1 }'
-| 5
$ echo 4,321 | LC_ALL=en_DK.utf-8 gawk '{ print $1 + 1 }'
-| 5,321

Pat   100 97 58
Sandy  84 72 93
Chris  72 92 89

$ awk '{ sum = $2 + $3 + $4 ; avg = sum / 3
>        print $1, avg }' grades
-| Pat 85
-| Sandy 83
-| Chris 84.3333

b * int(a / b) + (a % b) == a

-17 % 8 = -1

$ awk '{ print "Field number one: " $1 }' mail-list
-| Field number one: Amelia
-| Field number one: Anthony
…

$ awk '{ print "Field number one:" $1 }' mail-list
-| Field number one:Amelia
-| Field number one:Anthony
…

file = "file"
name = "name"
print "something meaningful" > file name

print "something meaningful" > (file name)

BEGIN {
    a = "don't"
    print (a " " (a = "panic"))
}

$ awk 'BEGIN { print -12 " " -24 }'
-| -12-24

$ awk 'BEGIN { print -12 " " (-24) }'
-| -12 -24

    −12 (" " − 24)
⇒ −12 (0 − 24)
⇒ −12 (−24)
⇒ −12−24

z = 1

thing = "food"
predicate = "good"
message = "this " thing " is " predicate

foo = 1
print foo

foo = "bar"
print foo

foo = "a string"
foo = foo + 5

x = y = z = 5

foo += 5

foo = foo + 5

# Thanks to Pat Rankin for this example
BEGIN  {
    foo[rand()] += 5
    for (x in foo)
       print x, foo[x]

    bar[rand()] = bar[rand()] + 5
    for (x in bar)
       print x, bar[x]
}

i = 1
a[i += 2] = i + 1

$ awk /==/ /dev/null
error→ awk: syntax error at source line 1
error→  context is
error→         >>> /= <<<
error→ awk: bailing out at source line 1

awk '/[=]=/' /dev/null

b = 6
print b += b++

b = 6
b += ++b + b++
print b

BEGIN {
   if (3.1415927)
       print "A strange truth value"
   if ("Four Score And Seven Years Ago")
       print "A strange truth value"
   if (j = 57)
       print "A strange truth value"
}

$ gawk 'BEGIN { print (a == "" && a == 0 ?
> "a is untyped" : "a has a type!") ; print typeof(a) }'
-| a is untyped
-| unassigned

$ gawk 'BEGIN { a = 42 ; print typeof(a)
> b = a ; print typeof(b) }'
number
number

$ gawk 'BEGIN { a = "forty two" ; print typeof(a)
> b = a ; print typeof(b) }'
string
string

echo hello | awk '{ printf("%s %s < 42\n", $1,
                           ($1 < 42 ? "is" : "is not")) }'

$ echo hello | awk '{ printf("%s %s < 42\n", $1,
>                            ($1 < 42 ? "is" : "is not")) }'
-| hello is not < 42

$ echo 37 | awk '{ printf("%s %s < 42\n", $1,
>                         ($1 < 42 ? "is" : "is not")) }'
-| 37 is < 42

BEGIN {
     a = 12.345
     b = a " is a cute number"
     print b
}

        +----------------------------------------------
        |       STRING          NUMERIC         STRNUM
--------+----------------------------------------------
        |
STRING  |       string          string          string
        |
NUMERIC |       string          numeric         numeric
        |
STRNUM  |       string          numeric         numeric
--------+----------------------------------------------

$ echo ' +3.14' | awk '{ print($0 == " +3.14") }'    True
-| 1
$ echo ' +3.14' | awk '{ print($0 == "+3.14") }'     False
-| 0
$ echo ' +3.14' | awk '{ print($0 == "3.14") }'      False
-| 0
$ echo ' +3.14' | awk '{ print($0 == 3.14) }'        True
-| 1
$ echo ' +3.14' | awk '{ print($1 == " +3.14") }'    False
-| 0
$ echo ' +3.14' | awk '{ print($1 == "+3.14") }'     True
-| 1
$ echo ' +3.14' | awk '{ print($1 == "3.14") }'      False
-| 0
$ echo ' +3.14' | awk '{ print($1 == 3.14) }'        True
-| 1

$ echo hello 37 | gawk '{ print typeof($1), typeof($2) }'
-| string strnum

if (a = b)   # oops! should be a == b
   …
else
   …

$ echo 1e2 3 | awk '{ print ($1 < $2) ? "true" : "false" }'
-| false

x == "foo"

x ~ /foo/

$0 ~ /regexp/

$ gawk 'BEGIN { printf("ABC < abc = %s\n",
>                     ("ABC" < "abc" ? "TRUE" : "FALSE")) }'
-| ABC < abc = TRUE
$ gawk --posix 'BEGIN { printf("ABC < abc = %s\n",
>                             ("ABC" < "abc" ? "TRUE" : "FALSE")) }'
-| ABC < abc = FALSE

a <= b && a >= b

if ($0 ~ /edu/ && $0 ~ /li/) print

if ($0 ~ /edu/ || $0 ~ /li/) print

BEGIN { if (! ("HOME" in ENVIRON))
            print "no home!" }

$1 == "START"   { interested = ! interested; next }
interested      { print }
$1 == "END"     { interested = ! interested; next }

if (! some condition || some other condition) {
    … do whatever processing …
}

selector ? if-true-exp : if-false-exp

x >= 0 ? x : -x

x == y ? a[i++] : b[i++]

sqrt(x^2 + y^2)        one argument
atan2(y, x)            two arguments
rand()                 no arguments

sqrt(argument)

$ awk '{ print "The square root of", $1, "is", sqrt($1) }'
1
-| The square root of 1 is 1
3
-| The square root of 3 is 1.73205
5
-| The square root of 5 is 2.23607
Ctrl-d

{
    if (match($1, $2))
        print RSTART, RLENGTH
    else
        print "no match"
}

$ awk -f matchit.awk
aaccdd  c+
-| 3 2
foo     bar
-| no match
abcdefg e
-| 5 1

/foo|bar|baz/  { buzzwords++ }
END            { print buzzwords, "buzzwords seen" }

$ awk '$1 == "li" { print $2 }' mail-list

$ awk '$1 ~ /li/ { print $2 }' mail-list
-| 555-5553
-| 555-6699

$ awk '/edu/ && /li/' mail-list
-| Samuel       555-3430     [email protected]        A

$ awk '/edu/ || /li/' mail-list
-| Amelia       555-5553     [email protected]    F
-| Broderick    555-0542     [email protected] R
-| Fabius       555-1234     [email protected]    F
-| Julie        555-6699     [email protected]   F
-| Samuel       555-3430     [email protected]        A
-| Jean-Paul    555-2127     [email protected]     R

$ awk '! /li/' mail-list
-| Anthony      555-3412     [email protected]   A
-| Becky        555-7685     [email protected]      A
-| Bill         555-1675     [email protected]       A
-| Camilla      555-2912     [email protected]     R
-| Fabius       555-1234     [email protected]    F

-| Martin       555-6480     [email protected]    A
-| Jean-Paul    555-2127     [email protected]     R

awk '$1 == "on", $1 == "off"' myfile

/^%$/,/^%$/    { next }
               { print }

/^%$/     { skip = ! skip; next }
skip == 1 { next } # skip lines with `skip' set

echo Yes | awk '/1/,/2/ || /Yes/'

$ echo Yes | gawk '(/1/,/2/) || /Yes/'
error→ gawk: cmd. line:1: (/1/,/2/) || /Yes/
error→ gawk: cmd. line:1:           ^ syntax error

$ awk '
> BEGIN { print "Analysis of \"li\"" }
> /li/  { ++n }
> END   { print "\"li\" appears in", n, "records." }' mail-list
-| Analysis of "li"
-| "li" appears in 4 records.

awk '{ print $1 }' mail-list

printf "Enter search pattern: "
read pattern
awk "/$pattern/ "'{ nmatches++ }
     END { print nmatches, "found" }' /path/to/data

printf "Enter search pattern: "
read pattern
awk -v pat="$pattern" '$0 ~ pat { nmatches++ }
       END { print nmatches, "found" }' /path/to/data

[pattern]  { action }
 pattern  [{ action }]
…
function name(args) { … }
…

/foo/  { }     match foo, do nothing --- empty action
/foo/          match foo, print the record --- omitted action

if (condition) then-body [else else-body]

if (x % 2 == 0)
    print "x is even"
else
    print "x is odd"

if (x % 2 == 0) print "x is even"; else
        print "x is odd"

while (condition)
  body

awk '
{
    i = 1
    while (i <= 3) {
        print $i
        i++
    }
}' inventory-shipped

do
  body
while (condition)

while (condition)
    body

{
    i = 1
    do {
        print $0
        i++
    } while (i <= 10)
}

for (initialization; condition; increment)
  body

awk '
{
    for (i = 1; i <= 3; i++)
        print $i
}' inventory-shipped

for (i = 0, j = length(a); i < j; i++, j--) …   C/C++, not awk!

for (i = 1; i <= 100; i *= 2)
    print i

initialization
while (condition) {
  body
  increment
}

for (i in array)
    do something with array[i]

switch (expression) {
case value or regular expression:
    case-body
default:
    default-body
}

while ((c = getopt(ARGC, ARGV, "aksx")) != -1) {
    switch (c) {
    case "a":
        # report size of all files
        all_files = TRUE;
        break
    case "k":
        BLOCK_SIZE = 1024       # 1K block size
        break
    case "s":
        # do sums only
        sum_only = TRUE
        break
    case "x":
        # don't cross filesystems
        fts_flags = or(fts_flags, FTS_XDEV)
        break
    case "?":
    default:
        usage()
        break
    }
}

# find smallest divisor of num
{
    num = $1
    for (divisor = 2; divisor * divisor <= num; divisor++) {
        if (num % divisor == 0)
            break
    }

    if (num % divisor == 0)
        printf "Smallest divisor of %d is %d\n", num, divisor
    else
        printf "%d is prime\n", num
}

# find smallest divisor of num
{
    num = $1
    for (divisor = 2; ; divisor++) {
        if (num % divisor == 0) {
            printf "Smallest divisor of %d is %d\n", num, divisor
            break
        }
        if (divisor * divisor > num) {
            printf "%d is prime\n", num
            break
        }
    }
}

BEGIN {
     for (x = 0; x <= 20; x++) {
         if (x == 5)
             continue
         printf "%d ", x
     }
     print ""
}

BEGIN {
     x = 0
     while (x <= 20) {
         if (x == 5)
             continue
         printf "%d ", x
         x++
     }
     print ""
}

NF != 4 {
    printf("%s:%d: skipped: NF != 4\n", FILENAME, FNR) > "/dev/stderr"
    next
}

exit [return code]

BEGIN {
    if (("date" | getline date_now) <= 0) {
        print "Can't get system date" > "/dev/stderr"
        exit 1
    }

    print "current date is", date_now
    close("date")
}

awk -F, 'program' input-files

$ awk 'BEGIN {
>         for (i = 0; i < ARGC; i++)
>             print ARGV[i]
>      }' inventory-shipped mail-list
-| awk
-| inventory-shipped
-| mail-list

gawk '
BEGIN {
        for (i = 0; i < length(PROCINFO["argv"]); i++)
                print i, PROCINFO["argv"][i]
}'

foo = 5
SYMTAB["foo"] = 4
print foo    # prints 4

SYMTAB["xxx"] = 5
print SYMTAB["xxx"]

# Indirect multiply of any variable by amount, return result
function multiply(variable, amount)
{
    return SYMTAB[variable] *= amount
}

BEGIN {
    answer = 10.5
    multiply("answer", 4)
    print "The answer is", answer
}

$ gawk -f answer.awk
-| The answer is 42

$ echo '1
> 2
> 3
> 4' | awk 'NR == 2 { NR = 17 }
> { print NR }'
-| 1
-| 17
-| 18
-| 19

$ awk 'BEGIN {
>        for (i = 0; i < ARGC; i++)
>            print ARGV[i]
>      }' inventory-shipped mail-list
-| awk
-| inventory-shipped
-| mail-list

BEGIN {
    printf "A=%d, B=%d\n", A, B
    for (i = 0; i < ARGC; i++)
        printf "\tARGV[%d] = %s\n", i, ARGV[i]
}
END   { printf "A=%d, B=%d\n", A, B }

$ awk -v A=1 -f showargs.awk B=2 /dev/null
-| A=1, B=0
-|        ARGV[0] = awk
-|        ARGV[1] = B=2
-|        ARGV[2] = /dev/null
-| A=1, B=2

awk -f myprog.awk -- -v -q file1 file2 …

BEGIN {
    for (i = 1; i < ARGC; i++) {
        if (ARGV[i] == "-v")
            verbose = 1
        else if (ARGV[i] == "-q")
            debug = 1
        else if (ARGV[i] ~ /^-./) {
            e = sprintf("%s: unrecognized option -- %c",
                    ARGV[0], substr(ARGV[i], 2, 1))
            print e > "/dev/stderr"
        } else
            break
        delete ARGV[i]
    }
}

gawk -f myprog.awk -q -v file1 file2 …

array[index-expression]

# Check if "foo" exists in a:         Incorrect!
if (a["foo"] != "") …

indx in array

if (2 in frequencies)
    print "Subscript 2 is present."

if (frequencies[2] != "")
    print "Subscript 2 is present."

array[index-expression] = value

{
    if ($1 > max)
        max = $1
    arr[$1] = $0
}
END {
    for (x = 1; x <= max; x++)
        print arr[x]
}

5  I am the Five man
2  Who are you?  The new number two!
4  . . . And four on the floor
1  Who is number one?
3  I three you.

1  Who is number one?
2  Who are you?  The new number two!
3  I three you.
4  . . . And four on the floor
5  I am the Five man

END {
    for (x = 1; x <= max; x++)
        if (x in arr)
            print arr[x]
}

for (var in array)
    body

# Record a 1 for each word that is used at least once
{
    for (i = 1; i <= NF; i++)
        used[$i] = 1
}

# Find number of distinct words more than 10 characters long
END {
    for (x in used) {
        if (length(x) > 10) {
            ++num_long_words
            print x
        }
    }
    print num_long_words, "words longer than 10 characters"
}

BEGIN {
    a["here"] = "here"
    a["is"] = "is"
    a["a"] = "a"
    a["loop"] = "loop"
    for (i in a) {
        j++
        a[j] = j
        print i
    }
}

$ gawk -f loopcheck.awk
-| here
-| loop
-| a
-| is

$ nawk -f loopcheck.awk
-| loop
-| here
-| is
-| a
-| 1

$ gawk '
> BEGIN {
>    a[4] = 4
>    a[3] = 3
>    for (i in a)
>        print i, a[i]
> }'
-| 4 4
-| 3 3
$ gawk '
> BEGIN {
>    PROCINFO["sorted_in"] = "@ind_str_asc"
>    a[4] = 4
>    a[3] = 3
>    for (i in a)
>        print i, a[i]
> }'
-| 3 3
-| 4 4

…
if ("sorted_in" in PROCINFO)
    save_sorted = PROCINFO["sorted_in"]
PROCINFO["sorted_in"] = "@val_str_desc" # or whatever
…
if (save_sorted)
    PROCINFO["sorted_in"] = save_sorted

xyz = 12.153
data[xyz] = 1
CONVFMT = "%2.2f"
if (xyz in data)
    printf "%s is in data\n", xyz
else
    printf "%s is not in data\n", xyz

for (i = 1; i <= maxsub; i++)
    do something with array[i]

$ echo 'line 1
> line 2
> line 3' | awk '{ l[lines] = $0; ++lines }
> END {
>     for (i = lines - 1; i >= 0; i--)
>        print l[i]
> }'
-| line 3
-| line 2

{ l[lines++] = $0 }
END {
    for (i = lines - 1; i >= 0; i--)
       print l[i]
}

delete array[index-expression]

for (i in frequencies)
    delete frequencies[i]

delete foo[4]
if (4 in foo)
    print "This will never be printed"

foo[4] = ""
if (4 in foo)
  print "This is printed, even though foo[4] is empty"

delete array

split("", array)

a[1] = 3
delete a
a = 3

if ((subscript1, subscript2, …) in array)
    …

{
     if (max_nf < NF)
          max_nf = NF
     max_nr = NR
     for (x = 1; x <= NF; x++)
          vector[x, NR] = $x
}
END {
     for (x = 1; x <= max_nf; x++) {
          for (y = max_nr; y >= 1; --y)
               printf("%s ", vector[x, y])
          printf("\n")
     }
}

1 2 3 4 5 6
2 3 4 5 6 1
3 4 5 6 1 2
4 5 6 1 2 3

4 3 2 1
5 4 3 2
6 5 4 3
1 6 5 4
2 1 6 5
3 2 1 6

for (combined in array) {
    split(combined, separate, SUBSEP)
    …
}

split("1\034foo", separate, "\034")

a[1][1] = 1
a[1][2] = 2

a[1][3][1, "name"] = "barney"

a[4] = "An element in a jagged array"

delete a[4]
a[4][5][6][7] = "An element in a four-dimensional array"

delete a[4][5]
a[4][5] = "An element in subarray a[4]"

print length(a), length(a[1]), length(a[1][3])

2, 3, 1

for (i in array)
    for (j in array[i])
        print array[i][j]

for (i in array) {
    if (isarray(array[i])) {
        for (j in array[i]) {
            print array[i][j]
        }
    }
    else
        print array[i]
}

for (i in a) {
    for (j in a[i]) {
        if (j == 3) {
            for (k in a[i][j])
                print a[i][j][k]

        } else
            print a[i][j]
    }
}

$ gawk 'BEGIN { split("a b c d", b[1]); print b[1][1] }'
error→ gawk: cmd. line:1: fatal: split: second argument is not an array

$ gawk 'BEGIN { b[1][1] = ""; split("a b c d", b[1]); print b[1][1] }'
-| a

i = 4
j = sqrt(i++)

i = 5
j = atan2(++i, i *= 2)

function randint(n)
{
    return int(n * rand())
}

# Function to roll a simulated die.
function roll(n) { return 1 + int(rand() * n) }
# Roll 3 six-sided dice and
# print total number of points.
{
    printf("%d points\n", roll(6) + roll(6) + roll(6))
}

a["last"] = "de"
a["first"] = "sac"
a["middle"] = "cul"

asort(a)

a[1] = "cul"
a[2] = "de"
a[3] = "sac"

a[1] = "first"
a[2] = "last"
a[3] = "middle"

$ gawk '
> BEGIN {
>      a = "abc def"
>      b = gensub(/(.+) (.+)/, "\\2 \\1", "g", a)
>      print b
> }'
-| def abc

$ echo a b c a b c |
> gawk '{ print gensub(/a/, "AA", 2) }'
-| a b c AA b c

{ gsub(/Britain/, "United Kingdom"); print }

$ awk 'BEGIN { print index("peanut", "an") }'
-| 3

$ gawk 'BEGIN { print length(x) ; x[1] = 1 }'
-| 0
error→ gawk: fatal: attempt to use scalar `x' as array
$ nawk 'BEGIN { print length(x) ; x[1] = 1 }'
-| 0

{
    if ($1 == "FIND")
        regex = $2
    else {
        where = match($0, regex)
        if (where != 0)
            print "Match of", regex, "found at", where, "in", $0
       }
}

FIND ru+n
My program runs
but not very quickly
FIND Melvin
JF+KM
This line is property of Reality Engineering Co.
Melvin was here.

Match of ru+n found at 12 in My program runs
Match of Melvin found at 1 in Melvin was here.

$ echo foooobazbarrrrr |
> gawk '{ match($0, /(fo+).+(bar*)/, arr)
>         print arr[1], arr[2] }'
-| foooo barrrrr

$ echo foooobazbarrrrr |
> gawk '{ match($0, /(fo+).+(bar*)/, arr)
>           print arr[1], arr[2]
>           print arr[1, "start"], arr[1, "length"]
>           print arr[2, "start"], arr[2, "length"]
> }'
-| foooo barrrrr
-| 1 5
-| 9 7

split("cul-de-sac", a, "-", seps)

a[1] = "cul"
a[2] = "de"
a[3] = "sac"

seps[1] = "-"
seps[2] = "-"

pival = sprintf("pi = %.2f (approx.)", 22/7)

$ echo 0x11 |
> gawk '{ printf "%d\n", strtonum($1) }'
-| 17

str = "water, water, everywhere"
sub(/at/, "ith", str)

{ sub(/candidate/, "& and his wife"); print }

$ awk 'BEGIN {
>         str = "daabaaa"
>         sub(/a+/, "C&C", str)
>         print str
> }'
-| dCaaCbaaa

{ sub(/\|/, "\\&"); print }

sub(/USA/, "United States", "the USA and Canada")

string = "abcdef"
# try to get "abCDEf", won't work
substr(string, 3, 3) = "CDE"

gsub(/xyz/, "pdq", substr($0, 5, 20))  # WRONG

string = "abcdef"
…
string = substr(string, 1, 2) "CDE" substr(string, 6)

$ echo abc | awk '{ gsub(/m*/, "X"); print }'
-| XaXbXcX

 You type         sub() sees          sub() generates
 ——–         ———-          —————
     \&              &            The matched text
    \\&             \&            A literal ‘&’
   \\\&             \&            A literal ‘&’
  \\\\&            \\&            A literal ‘\&’
 \\\\\&            \\&            A literal ‘\&’
\\\\\\&           \\\&            A literal ‘\\&’
    \\q             \q            A literal ‘\q’

 You type         sub() sees         sub() generates
 ——–         ———-         —————
\\\\\\&           \\\&            A literal ‘\&’
  \\\\&            \\&            A literal ‘\’, followed by the matched text
    \\&             \&            A literal ‘&’
    \\q             \q            A literal ‘\q’
   \\\\             \\            \\

 You type         sub() sees         sub() generates
 ——–         ———-         —————
\\\\\\&           \\\&            A literal ‘\&’
  \\\\&            \\&            A literal ‘\’, followed by the matched text
    \\&             \&            A literal ‘&’
    \\q             \q            A literal ‘\q’
   \\\\             \\            \

  You type          gensub() sees         gensub() generates
  ——–          ————-         ——————
      &                    &            The matched text
    \\&                   \&            A literal ‘&’
   \\\\                   \\            A literal ‘\’
  \\\\&                  \\&            A literal ‘\’, then the matched text
\\\\\\&                 \\\&            A literal ‘\&’
    \\q                   \q            A literal ‘q’

$ awk '{ print $1 + $2 }'
1 1
-| 2
2 3
-| 5
Ctrl-d

$ awk '{ print $1 + $2 }' | cat
1 1
2 3
Ctrl-d
-| 2
-| 5

END {
     system("date | mail -s 'awk run done' root")
}

while (more stuff to do)
    print command | "/bin/sh"
close("/bin/sh")

system("")   # flush output

BEGIN {
     print "first print"
     system("echo system echo")
     print "second print"
}

first print
system echo
second print

system echo
first print
second print

$ date '+Today is %A, %B %d, %Y.'
-| Today is Monday, September 22, 2014.

#! /bin/sh
#
# date --- approximate the POSIX 'date' command
case $1 in
-u)  TZ=UTC0     # use UTC
     export TZ
     shift ;;
esac
gawk 'BEGIN  {
    format = PROCINFO["strftime"]
    exitval = 0
    if (ARGC > 2)
        exitval = 1
    else if (ARGC == 2) {
        format = ARGV[1]
        if (format ~ /^\+/)
            format = substr(format, 2)   # remove leading +
    }
    print strftime(format)
    exit exitval
}' "$@"

                Bit operator
          |  AND  |   OR  |  XOR
          |---+---+---+---+---+---
Operands  | 0 | 1 | 0 | 1 | 0 | 1
----------+---+---+---+---+---+---
    0     | 0   0 | 0   1 | 0   1
    1     | 0   1 | 1   1 | 1   0

# bits2str --- turn an integer into readable ones and zeros
function bits2str(bits,        data, mask)
{
    if (bits == 0)
        return "0"
    mask = 1
    for (; bits != 0; bits = rshift(bits, 1))
        data = (and(bits, mask) ? "1" : "0") data
    while ((length(data) % 8) != 0)
        data = "0" data
    return data
}

BEGIN {
    printf "123 = %s\n", bits2str(123)
    printf "0123 = %s\n", bits2str(0123)
    printf "0x99 = %s\n", bits2str(0x99)
    comp = compl(0x99)
    printf "compl(0x99) = %#x = %s\n", comp, bits2str(comp)
    shift = lshift(0x99, 2)
    printf "lshift(0x99, 2) = %#x = %s\n", shift, bits2str(shift)
    shift = rshift(0x99, 2)
    printf "rshift(0x99, 2) = %#x = %s\n", shift, bits2str(shift)
}

$ gawk -f testbits.awk
-| 123 = 01111011
-| 0123 = 01010011
-| 0x99 = 10011001
-| compl(0x99) = 0x3fffffffffff66 =
-| 00111111111111111111111111111111111111111111111101100110
-| lshift(0x99, 2) = 0x264 = 0000001001100100
-| rshift(0x99, 2) = 0x26 = 00100110

$ gawk 'BEGIN { print compl(42) }'
-| 9007199254740949
$ gawk -M 'BEGIN { print compl(42) }'
-| -43

$ gawk 'BEGIN { printf "%#x\n", compl(42) }'
-| 0x1fffffffffffd5
$ gawk -M 'BEGIN { printf "%#x\n", compl(42) }'
-| 0xffffffffffffffd5

BEGIN {
    # creates a[1] but it has no assigned value
    a[1]
    print typeof(a[1])  # unassigned
}

BEGIN {
    print "initially, typeof(v) = ", typeof(v)
    if ("FOO" in ENVIRON)
        make_scalar(v)
    else
        make_array(v)
    print "typeof(v) =", typeof(v)
}
function make_scalar(p,    l) { l = p }
function make_array(p) { p[1] = 1 }

$ gawk 'BEGIN { print typeof(x) }'
-| untyped
$ gawk 'BEGIN { print typeof(x["foo"]) }'
-| untyped

$ gawk-5.1.1 'BEGIN { print typeof(x) }'
-| untyped
$ gawk-5.1.1 'BEGIN { print typeof(x["foo"]) }'
-| unassigned

function name([parameter-list])
{
     body-of-function
}

func foo() { a = sqrt($1) ; print a }

function myprint(num)
{
     printf "%6.3g\n", num
}

$3 > 0     { myprint($3) }

 1.2   3.4    5.6   7.8
 9.10 11.12 -13.14 15.16
17.18 19.20  21.22 23.24

   5.6
  21.2

function delarray(a,    i)
{
    for (i in a)
        delete a[i]
}

function rev(str)
{
    if (str == "")
        return ""
    return (rev(substr(str, 2)) substr(str, 1, 1))
}

$ echo "Don't Panic!" |
> gawk -e '{ print rev($0) }' -f rev.awk
-| !cinaP t'noD

# ctime.awk
#
# awk version of C ctime(3) function
function ctime(ts,    format)
{
    format = "%a %b %e %H:%M:%S %Z %Y"
    if (ts == 0)
        ts = systime()       # use current time as default
    return strftime(format, ts)
}

foo(x y, "lose", 4 * z)

function bar()
{
    for (i = 0; i < 3; i++)
        print "bar's i=" i
}
function foo(j)
{
    i = j + 1
    print "foo's i=" i
    bar()
    print "foo's i=" i
}
BEGIN {
      i = 10
      print "top's i=" i
      foo(0)
      print "top's i=" i
}

top's i=10
foo's i=1
bar's i=0
bar's i=1
bar's i=2
foo's i=3
top's i=3

function bar(    i)
{
    for (i = 0; i < 3; i++)
        print "bar's i=" i
}
function foo(j,    i)
{
    i = j + 1
    print "foo's i=" i
    bar()
    print "foo's i=" i
}
BEGIN {
      i = 10
      print "top's i=" i
      foo(0)
      print "top's i=" i
}

top's i=10
foo's i=1
bar's i=0
bar's i=1
bar's i=2
foo's i=1
top's i=10

function some_func(p1,      a)
{
    if (p1++ > 3)
        return

    a[p1] = p1
    some_func(p1)
    printf("At level %d, index %d %s found in a\n",
         p1, (p1 - 1), (p1 - 1) in a ? "is" : "is not")
    printf("At level %d, index %d %s found in a\n",
         p1, p1, p1 in a ? "is" : "is not")
    print ""
}
BEGIN {
    some_func(1)
}

At level 4, index 3 is not found in a
At level 4, index 4 is found in a
At level 3, index 2 is not found in a
At level 3, index 3 is found in a
At level 2, index 1 is not found in a
At level 2, index 2 is found in a

foo = "bar"
z = myfunc(foo)

function myfunc(str)
{
   print str
   str = "zzz"
   print str
}

function changeit(array, ind, nvalue)
{
     array[ind] = nvalue
}
BEGIN {
    a[1] = 1; a[2] = 2; a[3] = 3
    changeit(a, 2, "two")
    printf "a[1] = %s, a[2] = %s, a[3] = %s\n",
            a[1], a[2], a[3]
}

BEGIN {
    if (0)
        foo()
    else
        bar()
}
function bar() { … }
# note that `foo' is not defined

function foo(p1, p2)
{
    …
}
BEGIN {
    foo(1, 2, 3, 4)
}

return [expression]

function maxelt(vec,   i, ret)
{
     for (i in vec) {
          if (ret == "" || vec[i] > ret)
               ret = vec[i]
     }
     return ret
}

function maxelt(vec,   i, ret)
{
     for (i in vec) {
          if (ret == "" || vec[i] > ret)
               ret = vec[i]
     }
     return ret
}

# Load all fields of each record into nums.
{
     for(i = 1; i <= NF; i++)
          nums[NR, i] = $i
}

END {
     print maxelt(nums)
}

 1 5 23 8 16
44 3 5 2 8 26
256 291 1396 2962 100
-6 467 998 1101
99385 11 0 225

function foo(a)
{
    a[1] = 1   # parameter is an array
}
BEGIN {
    b = 1
    foo(b)  # invalid: fatal type mismatch
    foo(x)  # x uninitialized, becomes an array dynamically
    x = 1   # now not allowed, runtime error
}

BEGIN {
    funky(a)
    if (A == 0)
        print "<" a ">"
    else
        print a[1]
}
function funky(arr)
{
    if (A == 0)
        arr = 1
    else
        arr[1] = 1
}

$ nawk -v A=0 -f funky.awk
error→ nawk: can't assign to a; it's an array name.
error→  source line number 11
$ nawk -v A=1 -f funky.awk
-| 1

$ gawk -v A=0 -f funky.awk
-| <>
$ gawk -v A=1 -f funky.awk 
-| 1

Biology_101 sum average data: 87.0 92.4 78.5 94.9
Chemistry_305 sum average data: 75.2 98.3 94.7 88.2
English_401 sum average data: 100.0 95.6 87.1 93.4

{
    class = $1
    for (i = 2; $i != "data:"; i++) {
        if ($i == "sum")
            sum()   # processes the whole record
        else if ($i == "average")
            average()
        …           # and so on
    }
}

the_function = "sum"
result = @the_function()   # calls the sum() function

# indirectcall.awk --- Demonstrate indirect function calls
# average --- return the average of the values in fields $first - $last
function average(first, last,   sum, i)
{
    sum = 0;
    for (i = first; i <= last; i++)
        sum += $i
    return sum / (last - first + 1)
}
# sum --- return the sum of the values in fields $first - $last
function sum(first, last,   ret, i)
{
    ret = 0;
    for (i = first; i <= last; i++)
        ret += $i
    return ret
}

# For each record, print the class name and the requested statistics
{
    class_name = $1
    gsub(/_/, " ", class_name)  # Replace _ with spaces
    # find start
    for (i = 1; i <= NF; i++) {
        if ($i == "data:") {
            start = i + 1
            break
        }
    }
    printf("%s:\n", class_name)
    for (i = 2; $i != "data:"; i++) {
        the_function = $i
        printf("\t%s: <%s>\n", $i, @the_function(start, NF) "")
    }
    print ""
}

$ gawk -f indirectcall.awk class_data1
-| Biology 101:
-|     sum: <352.8>
-|     average: <88.2>
-|
-| Chemistry 305:
-|     sum: <356.4>
-|     average: <89.1>
-|
-| English 401:
-|     sum: <376.1>
-|     average: <94.025>

# quicksort.awk --- Quicksort algorithm, with user-supplied
#                   comparison function
# quicksort --- C.A.R. Hoare's quicksort algorithm. See Wikipedia
#               or almost any algorithms or computer science text.
function quicksort(data, left, right, less_than,    i, last)
{
    if (left >= right)  # do nothing if array contains fewer
        return          # than two elements
    quicksort_swap(data, left, int((left + right) / 2))
    last = left
    for (i = left + 1; i <= right; i++)
        if (@less_than(data[i], data[left]))
            quicksort_swap(data, ++last, i)
    quicksort_swap(data, left, last)
    quicksort(data, left, last - 1, less_than)
    quicksort(data, last + 1, right, less_than)
}
# quicksort_swap --- helper function for quicksort, should really be inline
function quicksort_swap(data, i, j,      temp)
{
    temp = data[i]
    data[i] = data[j]
    data[j] = temp
}

# num_lt --- do a numeric less than comparison
function num_lt(left, right)
{
    return ((left + 0) < (right + 0))
}

# num_ge --- do a numeric greater than or equal to comparison
function num_ge(left, right)
{
    return ((left + 0) >= (right + 0))
}

# do_sort --- sort the data according to `compare'
#             and return it as a string
function do_sort(first, last, compare,      data, i, retval)
{
    delete data
    for (i = 1; first <= last; first++) {
        data[i] = $first
        i++
    }
    quicksort(data, 1, i-1, compare)
    retval = data[1]
    for (i = 2; i in data; i++)
        retval = retval " " data[i]
    return retval
}

# sort --- sort the data in ascending order and return it as a string
function sort(first, last)
{
    return do_sort(first, last, "num_lt")
}

# rsort --- sort the data in descending order and return it as a string
function rsort(first, last)
{
    return do_sort(first, last, "num_ge")
}

Biology_101 sum average sort rsort data: 87.0 92.4 78.5 94.9
Chemistry_305 sum average sort rsort data: 75.2 98.3 94.7 88.2
English_401 sum average sort rsort data: 100.0 95.6 87.1 93.4

$ gawk -f quicksort.awk -f indirectcall.awk class_data2
-| Biology 101:
-|     sum: <352.8>
-|     average: <88.2>
-|     sort: <78.5 87.0 92.4 94.9>
-|     rsort: <94.9 92.4 87.0 78.5>
-|
-| Chemistry 305:
-|     sum: <356.4>
-|     average: <89.1>
-|     sort: <75.2 88.2 94.7 98.3>
-|     rsort: <98.3 94.7 88.2 75.2>
-|
-| English 401:
-|     sum: <376.1>
-|     average: <94.025>
-|     sort: <87.1 93.4 95.6 100.0>
-|     rsort: <100.0 95.6 93.4 87.1>

for (i = 1; i <= n; i++)
    @the_function()

# ignore case
{ $0 = tolower($0) }

function lib_func(x, y,    l1, l2)
{
    …
    # some_var should be local but by oversight is not
    use variable some_var
    …
}

# mystrtonum --- convert string to number
function mystrtonum(str,        ret, n, i, k, c)
{
    if (str ~ /^0[0-7]*$/) {
        # octal
        n = length(str)
        ret = 0
        for (i = 1; i <= n; i++) {
            c = substr(str, i, 1)
            # index() returns 0 if c not in string,
            # includes c == "0"
            k = index("1234567", c)
            ret = ret * 8 + k
        }
    } else if (str ~ /^0[xX][[:xdigit:]]+$/) {
        # hexadecimal
        str = substr(str, 3)    # lop off leading 0x
        n = length(str)
        ret = 0
        for (i = 1; i <= n; i++) {
            c = substr(str, i, 1)
            c = tolower(c)
            # index() returns 0 if c not in string,
            # includes c == "0"
            k = index("123456789abcdef", c)
            ret = ret * 16 + k
        }
    } else if (str ~ \
  /^[-+]?([0-9]+([.][0-9]*([Ee][0-9]+)?)?|([.][0-9]+([Ee][-+]?[0-9]+)?))$/) {
        # decimal number, possibly floating point
        ret = str + 0
    } else
        ret = "NOT-A-NUMBER"
    return ret
}
# BEGIN {     # gawk test harness
#     a[1] = "25"
#     a[2] = ".31"
#     a[3] = "0123"
#     a[4] = "0xdeadBEEF"
#     a[5] = "123.45"
#     a[6] = "1.e3"
#     a[7] = "1.32"
#     a[8] = "1.32E2"
#
#     for (i = 1; i in a; i++)
#         print a[i], strtonum(a[i]), mystrtonum(a[i])
# }

#include <assert.h>
int myfunc(int a, double b)
{
     assert(a <= 5 && b >= 17.1);
     …
}

prog.c:5: assertion failed: a <= 5 && b >= 17.1

# assert --- assert that a condition is true. Otherwise, exit.
function assert(condition, string)
{
    if (! condition) {
        printf("%s:%d: assertion failed: %s\n",
            FILENAME, FNR, string) > "/dev/stderr"
        _assert_exit = 1
        exit 1
    }
}

END {
    if (_assert_exit)
        exit 1
}

function myfunc(a, b)
{
     assert(a <= 5 && b >= 17.1, "a <= 5 && b >= 17.1")
     …
}

mydata:1357: assertion failed: a <= 5 && b >= 17.1

# round.awk --- do normal rounding
function round(x,   ival, aval, fraction)
{
   ival = int(x)    # integer part, int() truncates
   # see if fractional part
   if (ival == x)   # no fraction
      return ival   # ensure no decimals
   if (x < 0) {
      aval = -x     # absolute value
      ival = int(aval)
      fraction = aval - ival
      if (fraction >= .5)
         return int(x) - 1   # -2.5 --> -3
      else
         return int(x)       # -2.3 --> -2
   } else {
      fraction = x - ival
      if (fraction >= .5)
         return ival + 1
      else
         return ival
   }
}

# test harness
# { print $0, round($0) }

# cliff_rand.awk --- generate Cliff random numbers
BEGIN { _cliff_seed = 0.1 }
function cliff_rand()
{
    _cliff_seed = (100 * log(_cliff_seed)) % 1
    if (_cliff_seed < 0)
        _cliff_seed = - _cliff_seed
    return _cliff_seed
}

# ord.awk --- do ord and chr
# Global identifiers:
#    _ord_:        numerical values indexed by characters
#    _ord_init:    function to initialize _ord_
BEGIN    { _ord_init() }
function _ord_init(    low, high, i, t)
{
    low = sprintf("%c", 7) # BEL is ascii 7
    if (low == "\a") {    # regular ascii
        low = 0
        high = 127
    } else if (sprintf("%c", 128 + 7) == "\a") {
        # ascii, mark parity
        low = 128
        high = 255
    } else {        # ebcdic(!)
        low = 0
        high = 255
    }
    for (i = low; i <= high; i++) {
        t = sprintf("%c", i)
        _ord_[t] = i
    }
}

function ord(str,    c)
{
    # only first character is of interest
    c = substr(str, 1, 1)
    return _ord_[c]
}
function chr(c)
{
    # force c to be numeric by adding 0
    return sprintf("%c", c + 0)
}
#### test code ####
# BEGIN {
#    for (;;) {
#        printf("enter a character: ")
#        if (getline var <= 0)
#            break
#        printf("ord(%s) = %d\n", var, ord(var))
#    }
# }

# join.awk --- join an array into a string
function join(array, start, end, sep,    result, i)
{
    if (sep == "")
       sep = " "
    else if (sep == SUBSEP) # magic value
       sep = ""
    result = array[start]
    for (i = start + 1; i <= end; i++)
        result = result sep array[i]
    return result
}

# getlocaltime.awk --- get the time of day in a usable format
# Returns a string in the format of output of date(1)
# Populates the array argument time with individual values:
#    time["second"]       -- seconds (0 - 59)
#    time["minute"]       -- minutes (0 - 59)
#    time["hour"]         -- hours (0 - 23)
#    time["althour"]      -- hours (0 - 12)
#    time["monthday"]     -- day of month (1 - 31)
#    time["month"]        -- month of year (1 - 12)
#    time["monthname"]    -- name of the month
#    time["shortmonth"]   -- short name of the month
#    time["year"]         -- year modulo 100 (0 - 99)
#    time["fullyear"]     -- full year
#    time["weekday"]      -- day of week (Sunday = 0)
#    time["altweekday"]   -- day of week (Monday = 0)
#    time["dayname"]      -- name of weekday
#    time["shortdayname"] -- short name of weekday
#    time["yearday"]      -- day of year (0 - 365)
#    time["timezone"]     -- abbreviation of timezone name
#    time["ampm"]         -- AM or PM designation
#    time["weeknum"]      -- week number, Sunday first day
#    time["altweeknum"]   -- week number, Monday first day
function getlocaltime(time,    ret, now, i)
{
    # get time once, avoids unnecessary system calls
    now = systime()
    # return date(1)-style output
    ret = strftime("%a %b %e %H:%M:%S %Z %Y", now)
    # clear out target array
    delete time
    # fill in values, force numeric values to be
    # numeric by adding 0
    time["second"]       = strftime("%S", now) + 0
    time["minute"]       = strftime("%M", now) + 0
    time["hour"]         = strftime("%H", now) + 0
    time["althour"]      = strftime("%I", now) + 0
    time["monthday"]     = strftime("%d", now) + 0
    time["month"]        = strftime("%m", now) + 0
    time["monthname"]    = strftime("%B", now)
    time["shortmonth"]   = strftime("%b", now)
    time["year"]         = strftime("%y", now) + 0
    time["fullyear"]     = strftime("%Y", now) + 0
    time["weekday"]      = strftime("%w", now) + 0
    time["altweekday"]   = strftime("%u", now) + 0
    time["dayname"]      = strftime("%A", now)
    time["shortdayname"] = strftime("%a", now)
    time["yearday"]      = strftime("%j", now) + 0
    time["timezone"]     = strftime("%Z", now)
    time["ampm"]         = strftime("%p", now)
    time["weeknum"]      = strftime("%U", now) + 0
    time["altweeknum"]   = strftime("%W", now) + 0
    return ret
}

function readfile1(file,    tmp, contents)
{
    if ((getline tmp < file) < 0)
        return
    contents = tmp RT
    while ((getline tmp < file) > 0)
        contents = contents tmp RT
    close(file)
    return contents
}

# readfile.awk --- read an entire file at once
function readfile(file,     tmp, save_rs)
{
    save_rs = RS
    RS = "^$"
    getline tmp < file
    close(file)
    RS = save_rs
    return tmp
}

contents = readfile("/some/path")
if (length(contents) == 0)
    # file was empty …

#! /bin/sh
awkp='
   …
   '
input_program | awk "$awkp" | /bin/sh

$ flac-edit -song="Whoope! That's Great" file.flac

chmod +w file.flac
metaflac --remove-tag=TITLE file.flac
LANG=en_US.88591 metaflac --set-tag=TITLE='Whoope! That'"'"'s Great' file.flac
chmod -w file.flac

# shell_quote --- quote an argument for passing to the shell
function shell_quote(s,             # parameter
    SINGLE, QSINGLE, i, X, n, ret)  # locals
{
    if (s == "")
        return "\"\""
    SINGLE = "\x27"  # single quote
    QSINGLE = "\"\x27\""
    n = split(s, X, SINGLE)
    ret = SINGLE X[1] SINGLE
    for (i = 2; i <= n; i++)
        ret = ret QSINGLE SINGLE X[i] SINGLE
    return ret
}

# isnumeric --- check whether a value is numeric
function isnumeric(x,  f)
{
    switch (typeof(x)) {
    case "strnum":
    case "number":
        return 1
    case "string":
        return (split(x, f, " ") == 1) && (typeof(f[1]) == "strnum")
    default:
        return 0
    }
}

# transfile.awk
#
# Give the user a hook for filename transitions
#
# The user must supply functions beginfile() and endfile()
# that each take the name of the file being started or
# finished, respectively.
FILENAME != _oldfilename {
    if (_oldfilename != "")
        endfile(_oldfilename)
    _oldfilename = FILENAME
    beginfile(FILENAME)
}
END { endfile(FILENAME) }

# ftrans.awk --- handle datafile transitions
#
# user supplies beginfile() and endfile() functions
FNR == 1 {
    if (_filename_ != "")
        endfile(_filename_)
    _filename_ = FILENAME
    beginfile(FILENAME)
}
END { endfile(_filename_) }

# rewind.awk --- rewind the current file and start over
function rewind(    i)
{
    # shift remaining arguments up
    for (i = ARGC; i > ARGIND; i--)
        ARGV[i] = ARGV[i-1]
    # make sure gawk knows to keep going
    ARGC++
    # make current file next to get done
    ARGV[ARGIND+1] = FILENAME
    # do it
    nextfile
}

$ cat data
-| a
-| b
-| c
-| d
-| e
$ cat test.awk
-| FNR == 3 && ! rewound {
-|    rewound = 1
-|    rewind()
-| }
-|
-| { print FILENAME, FNR, $0 }
$ gawk -f rewind.awk -f test.awk data 
-| data 1 a
-| data 2 b
-| data 1 a
-| data 2 b
-| data 3 c

-| data 4 d
-| data 5 e

# readable.awk --- library file to skip over unreadable files
BEGIN {
    for (i = 1; i < ARGC; i++) {
        if (ARGV[i] ~ /^[a-zA-Z_][a-zA-Z0-9_]*=.*/ \
            || ARGV[i] == "-" || ARGV[i] == "/dev/stdin")
            continue    # assignment or standard input
        else if ((getline junk < ARGV[i]) < 0) # unreadable
            delete ARGV[i]
        else
            close(ARGV[i])
    }
}

# zerofile.awk --- library file to process empty input files
BEGIN { Argind = 0 }
ARGIND > Argind + 1 {
    for (Argind++; Argind < ARGIND; Argind++)
        zerofile(ARGV[Argind], Argind)
}
ARGIND != Argind { Argind = ARGIND }
END {
    if (ARGIND > Argind)
        for (Argind++; Argind <= ARGIND; Argind++)
            zerofile(ARGV[Argind], Argind)
}

# noassign.awk --- library file to avoid the need for a
# special option that disables command-line assignments
function disable_assigns(argc, argv,    i)
{
    for (i = 1; i < argc; i++)
        if (argv[i] ~ /^[a-zA-Z_][a-zA-Z0-9_]*=.*/)
            argv[i] = ("./" argv[i])
}
BEGIN {
    if (No_command_assign)
        disable_assigns(ARGC, ARGV)
}

awk -v No_command_assign=1 -f noassign.awk -f yourprog.awk *

prog -a -b foo -c data1 data2 data3
prog -ac -bfoo -- data1 data2 data3
prog -acbfoo data1 data2 data3

int
main(int argc, char *argv[])
{
    …
    /* print our own message */
    opterr = 0;
    while ((c = getopt(argc, argv, "v:f:F:W:")) != -1) {
        switch (c) {
        case 'f':    /* file */
            …
            break;
        case 'F':    /* field separator */
            …
            break;
        case 'v':    /* variable assignment */
            …
            break;
        case 'W':    /* extension */
            …
            break;
        case '?':
        default:
            usage();
            break;
        }
    }
    …
}

# getopt.awk --- Do C library getopt(3) function in awk
#                Also supports long options.
# External variables:
#    Optind -- index in ARGV of first nonoption argument
#    Optarg -- string value of argument to current option
#    Opterr -- if nonzero, print our own diagnostic
#    Optopt -- current option letter
# Returns:
#    -1     at end of options
#    "?"    for unrecognized option
#    <s>    a string representing the current option
# Private Data:
#    _opti  -- index in multiflag option, e.g., -abc

function getopt(argc, argv, options, longopts,    thisopt, i, j)
{
    if (length(options) == 0 && length(longopts) == 0)
        return -1                # no options given

    if (argv[Optind] == "--") {  # all done
        Optind++
        _opti = 0
        return -1

    } else if (argv[Optind] !~ /^-[^:[:space:]]/) {
        _opti = 0
        return -1
    }

    if (argv[Optind] !~ /^--/) {        # if this is a short option
        if (_opti == 0)
            _opti = 2
        thisopt = substr(argv[Optind], _opti, 1)
        Optopt = thisopt
        i = index(options, thisopt)
        if (i == 0) {
            if (Opterr)
                printf("%c -- invalid option\n", thisopt) > "/dev/stderr"
            if (_opti >= length(argv[Optind])) {
                Optind++
                _opti = 0
            } else
                _opti++
            return "?"
        }

        if (substr(options, i + 1, 1) == ":") {
            # get option argument
            if (length(substr(argv[Optind], _opti + 1)) > 0)
                Optarg = substr(argv[Optind], _opti + 1)
            else
                Optarg = argv[++Optind]
            _opti = 0
        } else
            Optarg = ""

        if (_opti == 0 || _opti >= length(argv[Optind])) {
            Optind++
            _opti = 0
        } else
            _opti++
        return thisopt

    } else {
        j = index(argv[Optind], "=")
        if (j > 0)
            thisopt = substr(argv[Optind], 3, j - 3)
        else
            thisopt = substr(argv[Optind], 3)
        Optopt = thisopt

        i = match(longopts, "(^|,)" thisopt "($|[,:])")
        if (i == 0) {
            if (Opterr)
                 printf("%s -- invalid option\n", thisopt) > "/dev/stderr"
            Optind++
            return "?"
        }

        if (substr(longopts, i-1+RLENGTH, 1) == ":") {
            if (j > 0)
                Optarg = substr(argv[Optind], j + 1)
            else
                Optarg = argv[++Optind]
        } else
            Optarg = ""

        Optind++
        return thisopt
    }
}

BEGIN {
    Opterr = 1    # default is to diagnose
    Optind = 1    # skip ARGV[0]
    # test program
    if (_getopt_test) {
        _myshortopts = "ab:cd"
        _mylongopts = "longa,longb:,otherc,otherd"
        while ((_go_c = getopt(ARGC, ARGV, _myshortopts, _mylongopts)) != -1)
            printf("c = <%s>, Optarg = <%s>\n", _go_c, Optarg)
        printf("non-option arguments:\n")
        for (; Optind < ARGC; Optind++)
            printf("\tARGV[%d] = <%s>\n", Optind, ARGV[Optind])
    }
}

$ awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x
-| c = <a>, Optarg = <>
-| c = <c>, Optarg = <>
-| c = <b>, Optarg = <ARG>
-| non-option arguments:
-|         ARGV[3] = <bax>
-|         ARGV[4] = <-x>
$ awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc
-| c = <a>, Optarg = <>
error→ x -- invalid option
-| c = <?>, Optarg = <>
-| non-option arguments:
-|         ARGV[4] = <xyz>
-|         ARGV[5] = <abc>
$ awk -f getopt.awk -v _getopt_test=1 -- -a \
> --longa -b xx --longb=foo=bar --otherd --otherc arg1 arg2
-| c = <a>, Optarg = <>
-| c = <longa>, Optarg = <>
-| c = <b>, Optarg = <xx>
-| c = <longb>, Optarg = <foo=bar>
-| c = <otherd>, Optarg = <>
-| c = <otherc>, Optarg = <>
-| non-option arguments:
-|        ARGV[8] = <arg1>
-|        ARGV[9] = <arg2>

/*
 * pwcat.c
 *
 * Generate a printable version of the password database.
 */
#include <stdio.h>
#include <pwd.h>
int
main(int argc, char **argv)
{
    struct passwd *p;
    while ((p = getpwent()) != NULL)
        printf("%s:%s:%ld:%ld:%s:%s:%s\n",
            p->pw_name, p->pw_passwd, (long) p->pw_uid,
            (long) p->pw_gid, p->pw_gecos, p->pw_dir, p->pw_shell);
    endpwent();
    return 0;
}

$ pwcat
-| root:x:0:1:Operator:/:/bin/sh
-| nobody:*:65534:65534::/:
-| daemon:*:1:1::/:
-| sys:*:2:2::/:/bin/csh
-| bin:*:3:3::/bin:
-| arnold:xyzzy:2076:10:Arnold Robbins:/home/arnold:/bin/sh
-| miriam:yxaay:112:10:Miriam Robbins:/home/miriam:/bin/sh
-| andy:abcca2:113:10:Andy Jacobs:/home/andy:/bin/sh
…

# passwd.awk --- access password file information
BEGIN {
    # tailor this to suit your system
    _pw_awklib = "/usr/local/libexec/awk/"
}
function _pw_init(    oldfs, oldrs, olddol0, pwcat, using_fw, using_fpat)
{
    if (_pw_inited)
        return
    oldfs = FS
    oldrs = RS
    olddol0 = $0
    using_fw = (PROCINFO["FS"] == "FIELDWIDTHS")
    using_fpat = (PROCINFO["FS"] == "FPAT")
    FS = ":"
    RS = "\n"
    pwcat = _pw_awklib "pwcat"
    while ((pwcat | getline) > 0) {
        _pw_byname[$1] = $0
        _pw_byuid[$3] = $0
        _pw_bycount[++_pw_total] = $0
    }
    close(pwcat)
    _pw_count = 0
    _pw_inited = 1
    FS = oldfs
    if (using_fw)
        FIELDWIDTHS = FIELDWIDTHS
    else if (using_fpat)
        FPAT = FPAT
    RS = oldrs
    $0 = olddol0
}

function getpwnam(name)
{
    _pw_init()
    return _pw_byname[name]
}

function getpwuid(uid)
{
    _pw_init()
    return _pw_byuid[uid]
}

function getpwent()
{
    _pw_init()
    if (_pw_count < _pw_total)
        return _pw_bycount[++_pw_count]
    return ""
}

function endpwent()
{
    _pw_count = 0
}

/*
 * grcat.c
 *
 * Generate a printable version of the group database.
 */
#include <stdio.h>
#include <grp.h>
int
main(int argc, char **argv)
{
    struct group *g;
    int i;
    while ((g = getgrent()) != NULL) {
        printf("%s:%s:%ld:", g->gr_name, g->gr_passwd,
                                     (long) g->gr_gid);
        for (i = 0; g->gr_mem[i] != NULL; i++) {
            printf("%s", g->gr_mem[i]);

            if (g->gr_mem[i+1] != NULL)
                putchar(',');
        }

        putchar('\n');
    }
    endgrent();
    return 0;
}

$ grcat
-| wheel:*:0:arnold
-| nogroup:*:65534:
-| daemon:*:1:
-| kmem:*:2:
-| staff:*:10:arnold,miriam,andy
-| other:*:20:
…

# group.awk --- functions for dealing with the group file
BEGIN {
    # Change to suit your system
    _gr_awklib = "/usr/local/libexec/awk/"
}
function _gr_init(    oldfs, oldrs, olddol0, grcat,
                             using_fw, using_fpat, n, a, i)
{
    if (_gr_inited)
        return
    oldfs = FS
    oldrs = RS
    olddol0 = $0
    using_fw = (PROCINFO["FS"] == "FIELDWIDTHS")
    using_fpat = (PROCINFO["FS"] == "FPAT")
    FS = ":"
    RS = "\n"
    grcat = _gr_awklib "grcat"
    while ((grcat | getline) > 0) {
        if ($1 in _gr_byname)
            _gr_byname[$1] = _gr_byname[$1] "," $4
        else
            _gr_byname[$1] = $0
        if ($3 in _gr_bygid)
            _gr_bygid[$3] = _gr_bygid[$3] "," $4
        else
            _gr_bygid[$3] = $0
        n = split($4, a, "[ \t]*,[ \t]*")
        for (i = 1; i <= n; i++)
            if (a[i] in _gr_groupsbyuser)
                _gr_groupsbyuser[a[i]] = _gr_groupsbyuser[a[i]] " " $1
            else
                _gr_groupsbyuser[a[i]] = $1
        _gr_bycount[++_gr_count] = $0
    }
    close(grcat)
    _gr_count = 0
    _gr_inited++
    FS = oldfs
    if (using_fw)
        FIELDWIDTHS = FIELDWIDTHS
    else if (using_fpat)
        FPAT = FPAT
    RS = oldrs
    $0 = olddol0
}

tvpeople:*:101:johnny,jay,arsenio
tvpeople:*:101:david,conan,tom,joan

function getgrnam(group)
{
    _gr_init()
    return _gr_byname[group]
}

function getgrgid(gid)
{
    _gr_init()
    return _gr_bygid[gid]
}

function getgruser(user)
{
    _gr_init()
    return _gr_groupsbyuser[user]
}

function getgrent()
{
    _gr_init()
    if (++_gr_count in _gr_bycount)
        return _gr_bycount[_gr_count]

    return ""
}

function endgrent()
{
    _gr_count = 0
}

function walk_array(arr, name,      i)
{
    for (i in arr) {
        if (isarray(arr[i]))
            walk_array(arr[i], (name "[" i "]"))
        else
            printf("%s[%s] = %s\n", name, i, arr[i])
    }
}

BEGIN {
    a[1] = 1
    a[2][1] = 21
    a[2][2] = 22
    a[3] = 3
    a[4][1][1] = 411
    a[4][2] = 42
    walk_array(a, "a")
}

$ gawk -f walk_array.awk
-| a[1] = 1
-| a[2][1] = 21
-| a[2][2] = 22
-| a[3] = 3
-| a[4][1][1] = 411
-| a[4][2] = 42

function process_array(arr, name, process, do_arrays,   i, new_name)
{
    for (i in arr) {
        new_name = (name "[" i "]")
        if (isarray(arr[i])) {
            if (do_arrays)
                @process(new_name, arr[i])
            process_array(arr[i], new_name, process, do_arrays)
        } else
            @process(new_name, arr[i])
    }
}

BEGIN {
    a[1] = 1
    a[2][1] = 21
    a[2][2] = 22
    a[3] = 3
    a[4][1][1] = 411
    a[4][2] = 42
    process_array(a, "a", "do_print", 0)
}
function do_print(name, element)
{
    printf "%s = %s\n", name, element
}

awk -f program -- options files

cut.awk -c1-8 myfiles > results

cut.awk -- -c1-8 myfiles > results

who | cut -c1-8 | sort | uniq

# cut.awk --- implement cut in awk
# Options:
#    -c list     Cut characters
#    -f list     Cut fields
#    -d c        Field delimiter character
#
#    -s          Suppress lines without the delimiter
#
# Requires getopt() and join() library functions

function usage()
{
    print("usage: cut [-f list] [-d c] [-s] [files...]") > "/dev/stderr"
    print("       cut [-c list] [files...]") > "/dev/stderr"
    exit 1
}

BEGIN {
    FS = "\t"    # default
    OFS = FS
    while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) {
        if (c == "f") {
            by_fields = 1
            fieldlist = Optarg
        } else if (c == "c") {
            by_chars = 1
            fieldlist = Optarg
            OFS = ""
        } else if (c == "d") {
            if (length(Optarg) > 1) {
                printf("cut: using first character of %s" \
                       " for delimiter\n", Optarg) > "/dev/stderr"
                Optarg = substr(Optarg, 1, 1)
            }
            fs = FS = Optarg
            OFS = FS
            if (FS == " ")    # defeat awk semantics
                FS = "[ ]"
        } else if (c == "s")
            suppress = 1
        else
            usage()
    }
    # Clear out options
    for (i = 1; i < Optind; i++)
        ARGV[i] = ""

    if (by_fields && by_chars)
        usage()
    if (by_fields == 0 && by_chars == 0)
        by_fields = 1    # default

    if (fieldlist == "") {
        print "cut: needs list for -c or -f" > "/dev/stderr"
        exit 1
    }

    if (by_fields)
        set_fieldlist()
    else
        set_charlist()
}

function set_fieldlist(        n, m, i, j, k, f, g)
{
    n = split(fieldlist, f, ",")
    j = 1    # index in flist
    for (i = 1; i <= n; i++) {
        if (index(f[i], "-") != 0) { # a range
            m = split(f[i], g, "-")

            if (m != 2 || g[1] >= g[2]) {
                printf("cut: bad field list: %s\n",
                                  f[i]) > "/dev/stderr"
                exit 1
            }

            for (k = g[1]; k <= g[2]; k++)
                flist[j++] = k
        } else
            flist[j++] = f[i]
    }
    nfields = j - 1
}

function set_charlist(    field, i, j, f, g, n, m, t,
                          filler, last, len)
{
    field = 1   # count total fields
    n = split(fieldlist, f, ",")
    j = 1       # index in flist
    for (i = 1; i <= n; i++) {
        if (index(f[i], "-") != 0) { # range
            m = split(f[i], g, "-")
            if (m != 2 || g[1] >= g[2]) {
                printf("cut: bad character list: %s\n",
                               f[i]) > "/dev/stderr"
                exit 1
            }
            len = g[2] - g[1] + 1
            if (g[1] > 1)  # compute length of filler
                filler = g[1] - last - 1
            else
                filler = 0

            if (filler)
                t[field++] = filler

            t[field++] = len  # length of field
            last = g[2]
            flist[j++] = field - 1
        } else {
            if (f[i] > 1)
                filler = f[i] - last - 1
            else
                filler = 0
            if (filler)
                t[field++] = filler
            t[field++] = 1
            last = f[i]
            flist[j++] = field - 1
        }
    }
    FIELDWIDTHS = join(t, 1, field - 1)
    nfields = j - 1
}

{
    if (by_fields && suppress && index($0, fs) == 0)
        next
    for (i = 1; i <= nfields; i++) {
        if ($flist[i] != "") {
            printf "%s", $flist[i]
            if (i < nfields && $flist[i+1] != "")
                printf "%s", OFS
        }
    }
    print ""
}

egrep [options] 'pattern' files …

# egrep.awk --- simulate egrep in awk
#
# Options:
#    -c    count of lines
#    -e    argument is pattern
#    -i    ignore case
#    -l    print filenames only
#    -n    add line number to output
#    -q    quiet - use exit value
#    -s    silent - don't print errors
#    -v    invert test, success if no match
#    -x    the entire line must match
#
# Requires getopt library function
# Uses IGNORECASE, BEGINFILE and ENDFILE
# Invoke using gawk -f egrep.awk -- options ...
BEGIN {
    while ((c = getopt(ARGC, ARGV, "ce:ilnqsvx")) != -1) {
        if (c == "c")
            count_only++
        else if (c == "e")
            pattern = Optarg
        else if (c == "i")
            IGNORECASE = 1
        else if (c == "l")
            filenames_only++
        else if (c == "n")
            line_numbers++
        else if (c == "q")
            no_print++
        else if (c == "s")
            no_errors++
        else if (c == "v")
            invert++
        else if (c == "x")
            full_line++
        else
            usage()
    }

    if (pattern == "")
        pattern = ARGV[Optind++]
    if (pattern == "")
      usage()
    for (i = 1; i < Optind; i++)
        ARGV[i] = ""
    if (Optind >= ARGC) {
        ARGV[1] = "-"
        ARGC = 2
    } else if (ARGC - Optind > 1)
        do_filenames++
}

BEGINFILE {
    fcount = 0
    if (ERRNO && no_errors)
        nextfile
}

ENDFILE {
    if (! no_print && count_only) {
        if (do_filenames)
            print file ":" fcount
        else
            print fcount
    }

    total += fcount
}

{
    matches = match($0, pattern)
    if (matches && full_line && (RSTART != 1 || RLENGTH != length()))
         matches = 0
    if (invert)
        matches = ! matches
    fcount += matches    # 1 or 0
    if (! matches)
        next
    if (! count_only) {
        if (no_print)
            nextfile
        if (filenames_only) {
            print FILENAME
            nextfile
        }
        if (do_filenames)
            if (line_numbers)
               print FILENAME ":" FNR ":" $0
            else
               print FILENAME ":" $0
        else
            print
    }
}

END {
    exit (total == 0)
}

function usage()
{
    print("Usage:\tegrep [-cilnqsvx] [-e pat] [files ...]") > "/dev/stderr"
    print("\tegrep [-cilnqsvx] pat [files ...]") > "/dev/stderr"
    exit 1
}

$ id
-| uid=1000(arnold) gid=1000(arnold) groups=1000(arnold),4(adm),7(lp),27(sudo)

# id.awk --- implement id in awk
#
# Requires user and group library functions and getopt
# output is:
# uid=12(foo) euid=34(bar) gid=3(baz) \
#             egid=5(blat) groups=9(nine),2(two),1(one)
# Options:
#   -G Output all group ids as space separated numbers (ruid, euid, groups)
#   -g Output only the euid as a number
#   -n Output name instead of the numeric value (with -g/-G/-u)
#   -r Output ruid/rguid instead of effective id
#   -u Output only effective user id, as a number

function usage()
{
    printf("Usage:\n" \
           "\tid [user]\n" \
           "\tid -G [-n] [user]\n" \
           "\tid -g [-nr] [user]\n" \
           "\tid -u [-nr] [user]\n") > "/dev/stderr"
    exit 1
}

BEGIN {
    # parse args
    while ((c = getopt(ARGC, ARGV, "Ggnru")) != -1) {
        if (c == "G")
            groupset_only++
        else if (c == "g")
            egid_only++
        else if (c == "n")
            names_not_groups++
        else if (c == "r")
            real_ids_only++
        else if (c == "u")
            euid_only++
        else
            usage()
    }

    if (groupset_only && real_ids_only)
        usage()
    else if (ARGC - Optind > 1)
        usage()

    if (ARGC - Optind == 0) {
        # gather info for current user
        uid = PROCINFO["uid"]
        euid = PROCINFO["euid"]
        gid = PROCINFO["gid"]
        egid = PROCINFO["egid"]
        for (i = 1; ("group" i) in PROCINFO; i++)
            groupset[i] = PROCINFO["group" i]
    } else {
        fill_info_for_user(ARGV[ARGC-1])
        real_ids_only++
    }

    if (groupset_only) {
        if (names_not_groups) {
            for (i = 1; i in groupset; i++) {
                entry = getgrgid(groupset[i])
                name = get_first_field(entry)
                printf("%s", name)
                if ((i + 1) in groupset)
                    printf(" ")
            }
        } else {
            for (i = 1; i in groupset; i++) {
                printf("%u", groupset[i])
                if ((i + 1) in groupset)
                    printf(" ")
            }
        }
        print ""    # final newline
        exit 0
    }

    else if (egid_only) {
        id = real_ids_only ? gid : egid
        if (names_not_groups) {
            entry = getgrgid(id)
            name = get_first_field(entry)
            printf("%s\n", name)
        } else {
            printf("%u\n", id)
        }
        exit 0
    }

    else if (euid_only) {
        id = real_ids_only ? uid : euid
        if (names_not_groups) {
            entry = getpwuid(id)
            name = get_first_field(entry)
            printf("%s\n", name)
        } else {
            printf("%u\n", id)
        }
        exit 0
    }

    printf("uid=%d", uid)
    pw = getpwuid(uid)
    print_first_field(pw)

    if (euid != uid && ! real_ids_only) {
        printf(" euid=%d", euid)
        pw = getpwuid(euid)
        print_first_field(pw)
    }

    printf(" gid=%d", gid)
    pw = getgrgid(gid)
    print_first_field(pw)
    if (egid != gid && ! real_ids_only) {
        printf(" egid=%d", egid)
        pw = getgrgid(egid)
        print_first_field(pw)
    }

    for (i = 1; i in groupset; i++) {
        if (i == 1)
            printf(" groups=")
        group = groupset[i]
        printf("%d", group)
        pw = getgrgid(group)
        print_first_field(pw)
        if ((i + 1) in groupset)
            printf(",")
    }
    print ""
}

function get_first_field(str,  a)
{
    if (str != "") {
        split(str, a, ":")
        return a[1]
    }
}

function print_first_field(str)
{
    first = get_first_field(str)
    printf("(%s)", first)
}

function fill_info_for_user(user,
                            pwent, fields, groupnames, grent, groups, i)
{
    pwent = getpwnam(user)
    if (pwent == "") {
        printf("id: '%s': no such user\n", user) > "/dev/stderr"
        exit 1
    }
    split(pwent, fields, ":")
    uid = fields[3] + 0
    gid = fields[4] + 0

    groupnames = getgruser(user)
    split(groupnames, groups, " ")
    for (i = 1; i in groups; i++) {
        grent = getgrnam(groups[i])
        split(grent, fields, ":")
        groupset[i] = fields[3] + 0
    }
}

split [-l count] [-a suffix-len] [file [outname]]
split -b N[k|m]] [-a suffix-len] [file [outname]]

# split.awk --- do split in awk
#
# Requires getopt() library function.
function usage(     common)
{
    common = "[-a suffix-len] [file [outname]]"
    printf("usage: split [-l count]  %s\n", common) > "/dev/stderr"
    printf("       split [-b N[k|m]] %s\n", common) > "/dev/stderr"
    exit 1
}

BEGIN {
    # Set defaults:
    Suffix_length = 2
    Line_count = 1000
    Byte_count = 0
    Outfile = "x"
    parse_arguments()
    init_suffix_data()
    Output = (Outfile compute_suffix())
}

function parse_arguments(   i, c, l, modifier)
{
    while ((c = getopt(ARGC, ARGV, "a:b:l:")) != -1) {
        if (c == "a")
            Suffix_length = Optarg + 0
        else if (c == "b") {
            Byte_count = Optarg + 0
            Line_count = 0
            l = length(Optarg)
            modifier = substr(Optarg, l, 1)
            if (modifier == "k")
                Byte_count *= 1024
            else if (modifier == "m")
                Byte_count *= 1024 * 1024
        } else if (c == "l") {
            Line_count = Optarg + 0
            Byte_count = 0
        } else
            usage()
    }
    # Clear out options
    for (i = 1; i < Optind; i++)
        ARGV[i] = ""
    # Check for filename
    if (ARGV[Optind]) {
        Optind++
        # Check for different prefix
        if (ARGV[Optind]) {
            Outfile = ARGV[Optind]
            ARGV[Optind] = ""
            if (++Optind < ARGC)
                usage()
        }
    }
}

function compute_suffix(    i, result, letters)
{
    # Logical step 3
    if (Reached_last) {
        printf("split: too many files!\n") > "/dev/stderr"
        exit 1
    } else if (on_last_file())
        Reached_last = 1    # fail when wrapping after 'zzz'
    # Logical step 1
    result = ""
    letters = "abcdefghijklmnopqrstuvwxyz"
    for (i = 1; i <= Suffix_length; i++)
        result = result substr(letters, Suffix_ind[i], 1)
    # Logical step 2
    for (i = Suffix_length; i >= 1; i--) {
        if (++Suffix_ind[i] > 26) {
            Suffix_ind[i] = 1
        } else
            break
    }
    return result
}

function init_suffix_data(  i)
{
    for (i = 1; i <= Suffix_length; i++)
        Suffix_ind[i] = 1
    Reached_last = 0
}

function on_last_file(  i, on_last)
{
    on_last = 1
    for (i = 1; i <= Suffix_length; i++) {
        on_last = on_last && (Suffix_ind[i] == 26)
    }
    return on_last
}

Line_count > 0 {
    if (++tcount > Line_count) {
        close(Output)
        Output = (Outfile compute_suffix())
        tcount = 1
    }
    print > Output
}

Byte_count > 0 {
    # `+ 1' is for the final newline
    if (tcount + length($0) + 1 > Byte_count) { # would overflow
        # compute leading bytes
        leading_bytes = Byte_count - tcount
        # write leading bytes
        printf("%s", substr($0, 1, leading_bytes)) > Output
        # close old file, open new file
        close(Output)
        Output = (Outfile compute_suffix())
        # set up first bytes for new file
        $0 = substr($0, leading_bytes + 1)  # trailing bytes
        tcount = 0
    }
    # write full record or trailing bytes
    tcount += length($0) + 1
    print > Output
}

END {
    close(Output)
}

tee [-a] file …

# tee.awk --- tee in awk
#
# Copy standard input to all named output files.
# Append content if -a option is supplied.
#
BEGIN {
    for (i = 1; i < ARGC; i++)
        copy[i] = ARGV[i]
    if (ARGV[1] == "-a") {
        append = 1
        delete ARGV[1]
        delete copy[1]
        ARGC--
    }
    if (ARGC < 2) {
        print "usage: tee [-a] file ..." > "/dev/stderr"
        exit 1
    }
    ARGV[1] = "-"
    ARGC = 2
}

{
    # moving the if outside the loop makes it run faster
    if (append)
        for (i in copy)
            print >> copy[i]
    else
        for (i in copy)
            print > copy[i]
    print
}

for (i in copy)
    if (append)
        print >> copy[i]

    else
        print > copy[i]

END {
    for (i in copy)
        close(copy[i])
}

uniq [-udc [-f n] [-s n]] [inputfile [outputfile]]

# uniq.awk --- do uniq in awk
#
# Requires getopt() and join() library functions

function usage()
{
    print("Usage: uniq [-udc [-f fields] [-s chars]] " \
          "[ in [ out ]]") > "/dev/stderr"
    exit 1
}
# -c    count lines. overrides -d and -u
# -d    only repeated lines
# -u    only nonrepeated lines
# -f n  skip n fields
# -s n  skip n characters, skip fields first

# As of 2020, '+' can be used as the option character in addition to '-'
# Previously allowed use of -N to skip fields and +N to skip
# characters is no longer allowed, and not supported by this version.
BEGIN {
    # Convert + to - so getopt can handle things
    for (i = 1; i < ARGC; i++) {
        first = substr(ARGV[i], 1, 1)
        if (ARGV[i] == "--" || (first != "-" && first != "+"))
            break
        else if (first == "+")
            # Replace "+" with "-"
            ARGV[i] = "-" substr(ARGV[i], 2)
    }
}

BEGIN {
    count = 1
    outputfile = "/dev/stdout"
    opts = "udcf:s:"
    while ((c = getopt(ARGC, ARGV, opts)) != -1) {
        if (c == "u")
            non_repeated_only++
        else if (c == "d")
            repeated_only++
        else if (c == "c")
            do_count++
        else if (c == "f")
            fcount = Optarg + 0
        else if (c == "s")
            charcount = Optarg + 0
        else
            usage()
    }
    for (i = 1; i < Optind; i++)
        ARGV[i] = ""
    if (repeated_only == 0 && non_repeated_only == 0)
        repeated_only = non_repeated_only = 1
    if (ARGC - Optind == 2) {
        outputfile = ARGV[ARGC - 1]
        ARGV[ARGC - 1] = ""
    }
}

function are_equal(    n, m, clast, cline, alast, aline)
{
    if (fcount == 0 && charcount == 0)
        return (last == $0)

    if (fcount > 0) {
        n = split(last, alast)
        m = split($0, aline)
        clast = join(alast, fcount+1, n)
        cline = join(aline, fcount+1, m)
    } else {
        clast = last
        cline = $0
    }
    if (charcount) {
        clast = substr(clast, charcount + 1)
        cline = substr(cline, charcount + 1)
    }

    return (clast == cline)
}

NR == 1 {
    last = $0
    next
}
{
    equal = are_equal()
    if (do_count) {    # overrides -d and -u
        if (equal)
            count++
        else {
            printf("%4d %s\n", count, last) > outputfile
            last = $0
            count = 1    # reset
        }
        next
    }
    if (equal)
        count++
    else {
        if ((repeated_only && count > 1) ||
            (non_repeated_only && count == 1))
                print last > outputfile
        last = $0
        count = 1
    }
}
END {
    if (do_count)
        printf("%4d %s\n", count, last) > outputfile

    else if ((repeated_only && count > 1) ||
            (non_repeated_only && count == 1))
        print last > outputfile
    close(outputfile)
}

wc [-lwcm] [files …]

# wc.awk --- count lines, words, characters, bytes
# Options:
#    -l    only count lines
#    -w    only count words
#    -c    only count bytes
#    -m    only count characters
#
# Default is to count lines, words, bytes
#
# Requires getopt() and file transition library functions
# Requires mbs extension from gawkextlib
@load "mbs"
BEGIN {
    # let getopt() print a message about
    # invalid options. we ignore them
    while ((c = getopt(ARGC, ARGV, "lwcm")) != -1) {
        if (c == "l")
            do_lines = 1
        else if (c == "w")
            do_words = 1
        else if (c == "c")
            do_bytes = 1
        else if (c == "m")
            do_chars = 1
    }
    for (i = 1; i < Optind; i++)
        ARGV[i] = ""
    # if no options, do lines, words, bytes
    if (! do_lines && ! do_words && ! do_chars && ! do_bytes)
        do_lines = do_words = do_bytes = 1
    print_total = (ARGC - i > 1)
}

function beginfile(file)
{
    lines = words = chars = bytes = 0
    fname = FILENAME
}

function endfile(file)
{
    tlines += lines
    twords += words
    tchars += chars
    tbytes += bytes
    if (do_lines)
        printf "\t%d", lines

    if (do_words)
        printf "\t%d", words

    if (do_chars)
        printf "\t%d", chars
    if (do_bytes)
        printf "\t%d", bytes
    printf "\t%s\n", fname
}

# do per line
{
    chars += length($0) + 1    # get newline
    bytes += mbs_length($0) + 1
    lines++
    words += NF
}

END {
    if (print_total) {
        if (do_lines)
            printf "\t%d", tlines
        if (do_words)
            printf "\t%d", twords
        if (do_chars)
            printf "\t%d", tchars
        if (do_bytes)
            printf "\t%d", tbytes
        print "\ttotal"
    }
}

# dupword.awk --- find duplicate words in text
{
    $0 = tolower($0)
    gsub(/[^[:alnum:][:blank:]]/, " ");
    $0 = $0         # re-split
    if (NF == 0)
        next
    if ($1 == prev)
        printf("%s:%d: duplicate %s\n",
            FILENAME, FNR, $1)
    for (i = 2; i <= NF; i++)
        if ($i == $(i-1))
            printf("%s:%d: duplicate %s\n",
                FILENAME, FNR, $i)
    prev = $NF
}

# alarm.awk --- set an alarm
#
# Requires getlocaltime() library function
# usage: alarm time [ "message" [ count [ delay ] ] ]
BEGIN {
    # Initial argument sanity checking
    usage1 = "usage: alarm time ['message' [count [delay]]]"
    usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1])
    if (ARGC < 2) {
        print usage1 > "/dev/stderr"
        print usage2 > "/dev/stderr"
        exit 1
    }
    switch (ARGC) {
    case 5:
        delay = ARGV[4] + 0
        # fall through
    case 4:
        count = ARGV[3] + 0
        # fall through
    case 3:
        message = ARGV[2]
        break
    default:
        if (ARGV[1] !~ /[[:digit:]]?[[:digit:]]:[[:digit:]]{2}/) {
            print usage1 > "/dev/stderr"
            print usage2 > "/dev/stderr"
            exit 1
        }
        break
    }
    # set defaults for once we reach the desired time
    if (delay == 0)
        delay = 180    # 3 minutes

    if (count == 0)
        count = 5

    if (message == "")
        message = sprintf("\aIt is now %s!\a", ARGV[1])
    else if (index(message, "\a") == 0)
        message = "\a" message "\a"

    # split up alarm time
    split(ARGV[1], atime, ":")
    hour = atime[1] + 0    # force numeric
    minute = atime[2] + 0  # force numeric
    # get current broken down time
    getlocaltime(now)
    # if time given is 12-hour hours and it's after that
    # hour, e.g., `alarm 5:30' at 9 a.m. means 5:30 p.m.,
    # then add 12 to real hour
    if (hour < 12 && now["hour"] > hour)
        hour += 12
    # set target time in seconds since midnight
    target = (hour * 60 * 60) + (minute * 60)
    # get current time in seconds since midnight
    current = (now["hour"] * 60 * 60) + \
               (now["minute"] * 60) + now["second"]
    # how long to sleep for
    naptime = target - current
    if (naptime <= 0) {
        print "alarm: time is in the past!" > "/dev/stderr"
        exit 1
    }

    # zzzzzz..... go away if interrupted
    if (system(sprintf("sleep %d", naptime)) != 0)
        exit 1
    # time to notify!
    command = sprintf("sleep %d", delay)
    for (i = 1; i <= count; i++) {
        print message
        # if sleep command interrupted, go away
        if (system(command) != 0)
            break
    }
    exit 0
}

generate data | tr 'A-Z' 'a-z' | process data …

# translate.awk --- do tr-like stuff
# Bugs: does not handle things like tr A-Z a-z; it has
# to be spelled out. However, if `to' is shorter than `from',
# the last character in `to' is used for the rest of `from'.
function stranslate(from, to, target,     lf, lt, ltarget, t_ar, i, c,
                                                               result)
{
    lf = length(from)
    lt = length(to)
    ltarget = length(target)
    for (i = 1; i <= lt; i++)
        t_ar[substr(from, i, 1)] = substr(to, i, 1)
    if (lt < lf)
        for (; i <= lf; i++)
            t_ar[substr(from, i, 1)] = substr(to, lt, 1)
    for (i = 1; i <= ltarget; i++) {
        c = substr(target, i, 1)
        if (c in t_ar)
            c = t_ar[c]
        result = result c
    }
    return result
}
function translate(from, to)
{
    return $0 = stranslate(from, to, $0)
}
# main program
BEGIN {

    if (ARGC < 3) {
        print "usage: translate from to" > "/dev/stderr"
        exit
    }

    FROM = ARGV[1]
    TO = ARGV[2]
    ARGC = 2
    ARGV[1] = "-"
}
{
    translate(FROM, TO)
    print
}

line 1          line 6
line 2          line 7
line 3          line 8
line 4          line 9
line 5          line 10
…

# labels.awk --- print mailing labels
# Each label is 5 lines of data that may have blank lines.
# The label sheets have 2 blank lines at the top and 2 at
# the bottom.
BEGIN    { RS = "" ; MAXLINES = 100 }
function printpage(    i, j)
{
    if (Nlines <= 0)
        return
    printf "\n\n"        # header
    for (i = 1; i <= Nlines; i += 10) {
        if (i == 21 || i == 61)
            print ""
        for (j = 0; j < 5; j++) {
            if (i + j > MAXLINES)
                break
            printf "   %-41s %s\n", line[i+j], line[i+j+5]
        }
        print ""
    }
    printf "\n\n"        # footer
    delete line
}
# main rule
{
    if (Count >= 20) {
        printpage()
        Count = 0
        Nlines = 0
    }
    n = split($0, a, "\n")
    for (i = 1; i <= n; i++)
        line[++Nlines] = a[i]
    for (; i <= 5; i++)
        line[++Nlines] = ""
    Count++
}
END {
    printpage()
}

# wordfreq-first-try.awk --- print list of word frequencies
{
    for (i = 1; i <= NF; i++)
        freq[$i]++
}

END {
    for (word in freq)
        printf "%s\t%d\n", word, freq[word]
}

# wordfreq.awk --- print list of word frequencies
{
    $0 = tolower($0)    # remove case distinctions
    # remove punctuation
    gsub(/[^[:alnum:]_[:blank:]]/, "", $0)
    for (i = 1; i <= NF; i++)
        freq[$i]++
}
END {
    for (word in freq)
        printf "%s\t%d\n", word, freq[word]
}

awk -f wordfreq.awk file1 | sort -k 2nr

END {
    sort = "sort -k 2nr"
    for (word in freq)
        printf "%s\t%d\n", word, freq[word] | sort
    close(sort)
}

# histsort.awk --- compact a shell history file
# Thanks to Byron Rakitzis for the general idea

{
    if (data[$0]++ == 0)
        lines[++count] = $0
}

END {
    for (i = 1; i <= count; i++)
        print lines[i]
}

print data[lines[i]], lines[i]

awk '{ if (! seen[$0]++) print }'

awk '! seen[$0]++'

…
This program has a @code{BEGIN} rule
that prints a nice message:
@example
@c file examples/messages.awk
BEGIN @{ print "Don't panic!" @}
@c endfile
@end example
It also prints some final advice:
@example
@c file examples/messages.awk
END @{ print "Always avoid bored archaeologists!" @}
@c endfile
@end example
…

# extract.awk --- extract files and run programs from Texinfo files
BEGIN    { IGNORECASE = 1 }
/^@c(omment)?[ \t]+system/ {
    if (NF < 3) {
        e = ("extract: " FILENAME ":" FNR)
        e = (e  ": badly formed `system' line")
        print e > "/dev/stderr"
        next
    }
    $1 = ""
    $2 = ""
    stat = system($0)
    if (stat != 0) {
        e = ("extract: " FILENAME ":" FNR)
        e = (e ": warning: system returned " stat)
        print e > "/dev/stderr"
    }
}

/^@c(omment)?[ \t]+file/ {
    if (NF != 3) {
        e = ("extract: " FILENAME ":" FNR ": badly formed `file' line")
        print e > "/dev/stderr"
        next
    }
    if ($3 != curfile) {
        if (curfile != "")
            filelist[curfile] = 1   # save to close later
        curfile = $3
    }
    for (;;) {
        if ((getline line) <= 0)
            unexpected_eof()
        if (line ~ /^@c(omment)?[ \t]+endfile/)
            break
        else if (line ~ /^@(end[ \t]+)?group/)
            continue
        else if (line ~ /^@c(omment+)?[ \t]+/)
            continue
        if (index(line, "@") == 0) {
            print line > curfile
            continue
        }
        n = split(line, a, "@")
        # if a[1] == "", means leading @,
        # don't add one back in.
        for (i = 2; i <= n; i++) {
            if (a[i] == "") { # was an @@
                a[i] = "@"
                if (a[i+1] == "")
                    i++
            }
        }

        print join(a, 1, n, SUBSEP) > curfile
    }
}

END {
    close(curfile)          # close the last one
    for (f in filelist)     # close all the rest
        close(f)
}

function unexpected_eof()
{
    printf("extract: %s:%d: unexpected EOF or error\n",
                     FILENAME, FNR) > "/dev/stderr"
    exit 1
}

command1 < orig.data | sed 's/old/new/g' | command2 > result

# awksed.awk --- do s/foo/bar/g using just print
#    Thanks to Michael Brennan for the idea
function usage()
{
    print "usage: awksed pat repl [files...]" > "/dev/stderr"
    exit 1
}

BEGIN {
    # validate arguments
    if (ARGC < 3)
        usage()

    RS = ARGV[1]
    ORS = ARGV[2]
    # don't use arguments as files
    ARGV[1] = ARGV[2] = ""
}

# look ma, no hands!
{
    if (RT == "")
        printf "%s", $0
    else
        print
}

# library functions
@include getopt.awk
@include join.awk
…
# main program
BEGIN {
    while ((c = getopt(ARGC, ARGV, "a:b:cde")) != -1)
        …
    …
}

#! /bin/sh
# igawk --- like gawk but do @include processing
if [ "$1" = debug ]
then
    set -x
    shift
fi
# A literal newline, so that program text is formatted correctly
n='
'
# Initialize variables to empty
program=
opts=
while [ $# -ne 0 ] # loop over arguments
do
    case $1 in
    --)     shift
            break ;;
    -W)     shift
            # The ${x?'message here'} construct prints a
            # diagnostic if $x is the null string
            set -- -W"${@?'missing operand'}"
            continue ;;
    -[vF])  opts="$opts $1 '${2?'missing operand'}'"
            shift ;;
    -[vF]*) opts="$opts '$1'" ;;
    -f)     program="$program$n@include ${2?'missing operand'}"
            shift ;;
    -f*)    f=$(expr "$1" : '-f\(.*\)')
            program="$program$n@include $f" ;;
    -[W-]file=*)
            f=$(expr "$1" : '-.file=\(.*\)')
            program="$program$n@include $f" ;;
    -[W-]file)
            program="$program$n@include ${2?'missing operand'}"
            shift ;;
    -[W-]source=*)
            t=$(expr "$1" : '-.source=\(.*\)')
            program="$program$n$t" ;;
    -[W-]source)
            program="$program$n${2?'missing operand'}"
            shift ;;
    -[W-]version)
            echo igawk: version 3.0 1>&2
            gawk --version
            exit 0 ;;
    -[W-]*) opts="$opts '$1'" ;;
    *)      break ;;
    esac
    shift
done
if [ -z "$program" ]
then
     program=${1?'missing program'}
     shift
fi
# At this point, `program' has the program.

expand_prog='
function pathto(file,    i, t, junk)
{
    if (index(file, "/") != 0)
        return file
    if (file == "-")
        return file
    for (i = 1; i <= ndirs; i++) {
        t = (pathlist[i] "/" file)

        if ((getline junk < t) > 0) {
            # found it
            close(t)
            return t
        }

    }
    return ""
}

BEGIN {
    path = ENVIRON["AWKPATH"]
    ndirs = split(path, pathlist, ":")
    for (i = 1; i <= ndirs; i++) {
        if (pathlist[i] == "")
            pathlist[i] = "."
    }

    stackptr = 0
    input[stackptr] = ARGV[1] # ARGV[1] is first file
    for (; stackptr >= 0; stackptr--) {
        while ((getline < input[stackptr]) > 0) {
            if (tolower($1) != "@include") {
                print
                continue
            }
            fpath = pathto($2)
            if (fpath == "") {
                printf("igawk: %s:%d: cannot find %s\n",
                    input[stackptr], FNR, $2) > "/dev/stderr"
                continue
            }
            if (! (fpath in processed)) {
                processed[fpath] = input[stackptr]
                input[++stackptr] = fpath  # push onto stack
            } else
                print $2, "included in", input[stackptr],
                    "already included in",
                    processed[fpath] > "/dev/stderr"
        }
        close(input[stackptr])
    }
}'  # close quote ends `expand_prog' variable
processed_program=$(gawk -- "$expand_prog" /dev/stdin << EOF
$program
EOF
)

eval gawk $opts -- '"$processed_program"' '"$@"'

# anagram.awk --- An implementation of the anagram-finding algorithm
#                 from Jon Bentley's "Programming Pearls," 2nd edition.
#                 Addison Wesley, 2000, ISBN 0-201-65788-0.
#                 Column 2, Problem C, section 2.8, pp 18-20.
/'s$/   { next }        # Skip possessives

{
    key = word2key($1)  # Build signature
    data[key][$1] = $1  # Store word with signature
}

# word2key --- split word apart into letters, sort, and join back together
function word2key(word,     a, i, n, result)
{
    n = split(word, a, "")
    asort(a)
    for (i = 1; i <= n; i++)
        result = result a[i]
    return result
}

END {
    sort = "sort"
    for (key in data) {
        # Sort words with same key
        nwords = asorti(data[key], words)
        if (nwords == 1)
            continue
        # And print. Minor glitch: trailing space at end of each line
        for (j = 1; j <= nwords; j++)
            printf("%s ", words[j]) | sort
        print "" | sort
    }
    close(sort)
}

$ gawk -f anagram.awk /usr/share/dict/words | grep '^b'
…
babbled blabbed
babbler blabber brabble
babblers blabbers brabbles
babbling blabbing
babbly blabby
babel bable
babels beslab
babery yabber
…

awk 'BEGIN{O="~"~"~";o="=="=="==";o+=+o;x=O""O;while(X++<=x+o+o)c=c"%c";
printf c,(x-O)*(x-O),x*(x-o)-o,x*(x-O)+x-O-o,+x*(x-O)-x+o,X*(o*o+O)+x-O,
X*(X-x)-o*o,(x+X)*o*o+o,x*(X-x)-O-O,x-O+(O+o+X+x)*(o+O),X*X-X*(x-O)-x+O,
O+X*(o*(o+O)+O),+x+O+X*o,x*(x-o),(o+X+x)*o*o-(x-O-O),O+(X-x)*(X+O),x-O}'

BEGIN {
    pat = ARGV[1]
    repl = ARGV[2]
    ARGV[1] = ARGV[2] = ""
}
{ gsub(pat, repl); print }

$ echo 0123 123 0x123 |
> gawk --non-decimal-data '{ printf "%d, %d, %d\n", $1, $2, $3 }'
-| 83, 123, 291

$ echo 0123 123 0x123 | gawk '{ print $1, $2, $3 }'
-| 0123 123 0x123

$ echo 0123 123 0x123 | gawk --non-decimal-data '
> { print $1, $2, $3
>   print $1 + 0, $2 + 0, $3 + 0 }'
-| 0123 123 0x123
-| 83 123 291

function comp_func(i1, v1, i2, v2)
{
    compare elements 1 and 2 in some fashion
    return < 0; 0; or > 0
}

function cmp_num_idx(i1, v1, i2, v2)
{
     # numerical index comparison, ascending order
     return (i1 - i2)
}

function cmp_str_val(i1, v1, i2, v2)
{
    # string value comparison, ascending order
    v1 = v1 ""
    v2 = v2 ""
    if (v1 < v2)
        return -1
    return (v1 != v2)
}

function cmp_num_str_val(i1, v1, i2, v2,   n1, n2)
{
     # numbers before string value comparison, ascending order
     n1 = v1 + 0
     n2 = v2 + 0
     if (n1 == v1)
         return (n2 == v2) ? (n1 - n2) : -1
     else if (n2 == v2)
         return 1
     return (v1 < v2) ? -1 : (v1 != v2)
}

BEGIN {
    data["one"] = 10
    data["two"] = 20
    data[10] = "one"
    data[100] = 100
    data[20] = "two"
    f[1] = "cmp_num_idx"
    f[2] = "cmp_str_val"
    f[3] = "cmp_num_str_val"
    for (i = 1; i <= 3; i++) {
        printf("Sort function: %s\n", f[i])
        PROCINFO["sorted_in"] = f[i]
        for (j in data)
            printf("\tdata[%s] = %s\n", j, data[j])
        print ""
    }
}

$ gawk -f compdemo.awk
-| Sort function: cmp_num_idx      Sort by numeric index
-|     data[two] = 20
-|     data[one] = 10              Both strings are numerically zero
-|     data[10] = one
-|     data[20] = two
-|     data[100] = 100
-|
-| Sort function: cmp_str_val      Sort by element values as strings
-|     data[one] = 10
-|     data[100] = 100             String 100 is less than string 20
-|     data[two] = 20
-|     data[10] = one
-|     data[20] = two
-|
-| Sort function: cmp_num_str_val  Sort all numeric values before all strings
-|     data[one] = 10
-|     data[two] = 20
-|     data[100] = 100
-|     data[10] = one
-|     data[20] = two

# passwd-sort.awk --- simple program to sort by field position
# field position is specified by the global variable POS
function cmp_field(i1, v1, i2, v2)
{
    # comparison by value, as string, and ascending order
    return v1[POS] < v2[POS] ? -1 : (v1[POS] != v2[POS])
}
{
    for (i = 1; i <= NF; i++)
        a[NR][i] = $i
}

END {
    PROCINFO["sorted_in"] = "cmp_field"

    if (POS < 1 || POS > NF)
        POS = 1
    for (i in a) {
        for (j = 1; j <= NF; j++)
            printf("%s%c", a[i][j], j < NF ? ":" : "")
        print ""
    }
}

$ gawk -v POS=1 -F: -f sort.awk /etc/passwd
-| adm:x:3:4:adm:/var/adm:/sbin/nologin
-| apache:x:48:48:Apache:/var/www:/sbin/nologin
-| avahi:x:70:70:Avahi daemon:/:/sbin/nologin
…

function cmp_randomize(i1, v1, i2, v2)
{
    # random order (caution: this may never terminate!)
    return (2 - 4 * rand())
}

function cmp_numeric(i1, v1, i2, v2)
{
    # numerical value (and index) comparison, descending order
    return (v1 != v2) ? (v2 - v1) : (i2 - i1)
}

function cmp_string(i1, v1, i2, v2)
{
    # string value (and index) comparison, descending order
    v1 = v1 i1
    v2 = v2 i2
    return (v1 > v2) ? -1 : (v1 != v2)
}

populate the array data
n = asort(data)
for (i = 1; i <= n; i++)
    do something with data[i]

populate the array source
n = asort(source, dest)
for (i = 1; i <= n; i++)
    do something with dest[i]

{ source[$0] = some_func($0) }
END {
    n = asorti(source, dest)
    for (i = 1; i <= n; i++) {
        Work with sorted indices directly:
        do something with dest[i]
        …
        Access original array via sorted indices:
        do something with source[dest[i]]
    }
}

# case_fold_compare --- compare as strings, ignoring case
function case_fold_compare(i1, v1, i2, v2,    l, r)
{
    l = tolower(v1)

    r = tolower(v2)
    if (l < r)
        return -1
    else if (l == r)
        return 0
    else
        return 1
}

# Test program
BEGIN {
    Letters = "abcdefghijklmnopqrstuvwxyz" \
              "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    split(Letters, data, "")
    asort(data, result, "case_fold_compare")
    j = length(result)
    for (i = 1; i <= j; i++) {
        printf("%s", result[i])
        if (i % (j/2) == 0)
            printf("\n")
        else
            printf(" ")
    }
}

$ gawk -f case_fold_compare.awk
-| A a B b c C D d e E F f g G H h i I J j k K l L M m
-| n N O o p P Q q r R S s t T u U V v w W X x y Y z Z

# Write the data for processing
tempfile = ("mydata." PROCINFO["pid"])
while (not done with data)
    print data | ("subprogram > " tempfile)
close("subprogram > " tempfile)
# Read the results, remove tempfile when done
while ((getline newdata < tempfile) > 0)
    process newdata appropriately
close(tempfile)
system("rm " tempfile)

do {
    print data |& "subprogram"
    "subprogram" |& getline results
} while (data left to process)
close("subprogram")

BEGIN {
    command = "LC_ALL=C sort"
    n = split("abcdefghijklmnopqrstuvwxyz", a, "")
    for (i = n; i > 0; i--)
        print a[i] |& command
    close(command, "to")
    while ((command |& getline line) > 0)
        print "got", line
    close(command)
}

command = "sort -nr"           # command, save in convenience variable
PROCINFO[command, "pty"] = 1   # update PROCINFO
print … |& command           # start two-way pipe
…

$ cat add.c
#include <stdio.h>
int
main(void)
{
    int x, y;
    while (scanf("%d %d", & x, & y) == 2)
        printf("%d\n", x + y);
    return 0;
}
$ cc -O add.c -o add      Compile the program

$ echo 1 2 |
> gawk -v cmd=./add '{ print |& cmd; cmd |& getline x; print x }'

$ echo 1 2 |
> gawk -v cmd=add 'BEGIN { PROCINFO[cmd, "pty"] = 1 }
>                  { print |& cmd; cmd |& getline x; print x }'
-| 3

BEGIN {
    Service = "/inet/tcp/0/localhost/daytime"
    Service |& getline
    print $0
    close(Service)
}

gawk --profile=myprog.prof -f myprog.awk data1 data2

BEGIN { print "First BEGIN rule" }
END { print "First END rule" }
/foo/ {
    print "matched /foo/, gosh"
    for (i = 1; i <= 3; i++)
        sing()
}
{
    if (/foo/)
        print "if is true"
    else
        print "else is true"
}
BEGIN { print "Second BEGIN rule" }
END { print "Second END rule" }
function sing(    dummy)
{
    print "I gotta be me!"
}

foo
bar
baz
foo
junk

    # gawk profile, created Mon Sep 29 05:16:21 2014
    # BEGIN rule(s)
    BEGIN {
 1          print "First BEGIN rule"
    }
    BEGIN {
 1          print "Second BEGIN rule"
    }
    # Rule(s)
 5  /foo/ { # 2
 2          print "matched /foo/, gosh"
 6          for (i = 1; i <= 3; i++) {
 6                  sing()
            }
    }
 5  {
 5          if (/foo/) { # 2
 2                  print "if is true"
 3          } else {
 3                  print "else is true"
            }
    }
    # END rule(s)
    END {
 1          print "First END rule"
    }
    END {
 1          print "Second END rule"
    }
    # Functions, listed alphabetically
 6  function sing(dummy)
    {
 6          print "I gotta be me!"
    }

/foo/

/foo/   {
    print
}

$ gawk --profile -f myprog &
[1] 13992

$ kill -USR1 13992

# Function Call Stack:
#   3. baz
#   2. bar
#   1. foo
# -- main --

$ gawk --version
-| GNU Awk 5.2.2, API 3.2, PMA Avon 8-g1, (GNU MPFR 4.1.0, GNU MP 6.2.1)
-| Copyright (C) 1989, 1991-2023 Free Software Foundation.
…

$ truncate -s 4G data.pma

$ chmod 0600 data.pma

$ GAWK_PERSIST_FILE=data.pma gawk 'BEGIN { print ++i }'
1

$ GAWK_PERSIST_FILE=data.pma gawk 'BEGIN { print ++i }'
2
$ GAWK_PERSIST_FILE=data.pma gawk 'BEGIN { print ++i }'
3

printf("%s", gettext("Don't Panic!\n"));

/* In the standard header file: */
#define _(str) gettext(str)
/* In the program text: */
printf("%s", _("Don't Panic!\n"));

BEGIN {
    TEXTDOMAIN = "guide"
    …
}

print _"hello, world"
x = _"you goofed"
printf(_"Number of users is %d\n", nusers)

if (groggy)
    message = dcgettext("%d customers disturbing me\n", "adminprog")
else
    message = dcgettext("enjoying %d customers\n", "adminprog")
printf(message, ncustomers)

if (groggy)
    message = dcngettext("%d customer disturbing me\n",
                         "%d customers disturbing me\n",
                         ncustomers, "adminprog")
else
    message = dcngettext("enjoying %d customer\n",
                         "enjoying %d customers\n",
                         ncustomers, "adminprog")
printf(message, ncustomers)

BEGIN {
   TEXTDOMAIN = "guide"   # our text domain
   if (Testing) {
       # where to find our files
       bindtextdomain("testdir")
       # joe is in charge of adminprog
       bindtextdomain("../joe/testdir", "adminprog")
   }
   …
}

gawk --gen-pot -f guide.awk > guide.pot

printf(_"String `%s' has %d characters\n",
          string, length(string)))

"%d Zeichen lang ist die Zeichenkette `%s'\n"

"%2$d Zeichen lang ist die Zeichenkette `%1$s'\n"

$ gawk 'BEGIN {
>     string = "Don\47t Panic"
>     printf "%2$d characters live in \"%1$s\"\n",
>                         string, length(string)
> }'
-| 11 characters live in "Don't Panic"

$ gawk 'BEGIN {
>    printf("%*.*s\n", 10, 20, "hello")
>    printf("%3$*2$.*1$s\n", 20, 10, "hello")
> }'
-|      hello
-|      hello

$ gawk 'BEGIN { printf "%d %3$s\n", 1, 2, "hi" }'
error→ gawk: cmd. line:1: fatal: must use `count$' on all formats or none

BEGIN {
    TEXTDOMAIN = "guide"
    if (Test_Guide)   # set with -v
        bindtextdomain("/test/guide/messages")
    print _"don't panic!"
}

function bindtextdomain(dir, domain)
{
    return dir
}
function dcgettext(string, domain, category)
{
    return string
}
function dcngettext(string1, string2, number, domain, category)
{
    return (number == 1 ? string1 : string2)
}

BEGIN {
    TEXTDOMAIN = "guide"
    bindtextdomain(".")  # for testing
    print _"Don't Panic"
    print _"The Answer Is", 42
    print "Pardon me, Zaphod who?"
}

$ gawk --gen-pot -f guide.awk > guide.pot

#: guide.awk:4
msgid "Don't Panic"
msgstr ""
#: guide.awk:5
msgid "The Answer Is"
msgstr ""

$ cp guide.pot guide-mellow.po
Add translations to guide-mellow.po …

#: guide.awk:4
msgid "Don't Panic"
msgstr "Hey man, relax!"
#: guide.awk:5
msgid "The Answer Is"
msgstr "Like, the scoop is"

$ echo $LANGUAGE
-| en_US.UTF-8

$ mkdir en_US.UTF-8 en_US.UTF-8/LC_MESSAGES

$ msgfmt guide-mellow.po -o en_US.UTF-8/LC_MESSAGES/guide.mo

$ gawk -f guide.awk
-| Hey man, relax!
-| Like, the scoop is 42
-| Pardon me, Zaphod who?

$ gawk --posix -f guide.awk -f libintl.awk
-| Don't Panic
-| The Answer Is 42
-| Pardon me, Zaphod who?

$ gawk -D -f getopt.awk -f join.awk -f uniq.awk -- -1 inputfile

gawk>

awk is a wonderful program!
gawk is a wonderful program!

clast = join(alast, fcount+1, n)
cline = join(aline, fcount+1, m)

clast = join(alast, fcount, n)
cline = join(aline, fcount, m)

gawk> b are_equal
-| Breakpoint 1 set at file `awklib/eg/prog/uniq.awk', line 63

gawk> r
-| Starting program:
-| Stopping in Rule ...
-| Breakpoint 1, are_equal(n, m, clast, cline, alast, aline)
         at `awklib/eg/prog/uniq.awk':63
-| 63          if (fcount == 0 && charcount == 0)
gawk>

gawk> bt
-| #0  are_equal(n, m, clast, cline, alast, aline)
         at `awklib/eg/prog/uniq.awk':68
-| #1  in main() at `awklib/eg/prog/uniq.awk':88

gawk> p n
-| n = untyped variable

gawk> p $0
-| $0 = "gawk is a wonderful program!"

gawk> p NR
-| NR = 2

NR == 1 {
    last = $0
    next
}

gawk> p last
-| last = "awk is a wonderful program!"

gawk> n
-| 66          if (fcount > 0) {

gawk> n
-| 67              n = split(last, alast)
gawk> n
-| 68              m = split($0, aline)

gawk> p n m alast aline
-| n = 5
-| m = untyped variable
-| alast = array, 5 elements
-| aline = untyped variable

gawk> p alast[0]
-| "0" not in array `alast'

gawk> p alast[1]
-| alast["1"] = "awk"

gawk> p @alast
-| alast["1"] = "awk"
-| alast["2"] = "is"
-| alast["3"] = "a"
-| alast["4"] = "wonderful"
-| alast["5"] = "program!"

gawk> n
-| 69              clast = join(alast, fcount, n)
gawk> n
-| 70              cline = join(aline, fcount, m)

gawk> p cline clast
-| cline = "gawk is a wonderful program!"
-| clast = "awk is a wonderful program!"

gawk> q
-| The program is running. Exit anyway (y/n)? y

clast = join(alast, fcount+1, n)
cline = join(aline, fcount+1, m)

gawk> commands
> silent
> printf "A silent breakpoint; i = %d\n", i
> info locals
> set i = 10
> continue
> end
gawk>

gawk> display x
-| 10: x = 1

gawk> print $3

gawk> print @a

gawk> dump
-|        # BEGIN
-|
-| [  1:0xfcd340] Op_rule           : [in_rule = BEGIN] [source_file = brini.awk]

-| [  1:0xfcc240] Op_push_i         : "~" [MALLOC|STRING|STRCUR]
-| [  1:0xfcc2a0] Op_push_i         : "~" [MALLOC|STRING|STRCUR]
-| [  1:0xfcc280] Op_match          :
-| [  1:0xfcc1e0] Op_store_var      : O
-| [  1:0xfcc2e0] Op_push_i         : "==" [MALLOC|STRING|STRCUR]
-| [  1:0xfcc340] Op_push_i         : "==" [MALLOC|STRING|STRCUR]
-| [  1:0xfcc320] Op_equal          :
-| [  1:0xfcc200] Op_store_var      : o
-| [  1:0xfcc380] Op_push           : o
-| [  1:0xfcc360] Op_plus_i         : 0 [MALLOC|NUMCUR|NUMBER]
-| [  1:0xfcc220] Op_push_lhs       : o [do_reference = true]
-| [  1:0xfcc300] Op_assign_plus    :
-| [   :0xfcc2c0] Op_pop            :
-| [  1:0xfcc400] Op_push           : O
-| [  1:0xfcc420] Op_push_i         : "" [MALLOC|STRING|STRCUR]
-| [   :0xfcc4a0] Op_no_op          :
-| [  1:0xfcc480] Op_push           : O
-| [   :0xfcc4c0] Op_concat         : [expr_count = 3] [concat_flag = 0]
-| [  1:0xfcc3c0] Op_store_var      : x
-| [  1:0xfcc440] Op_push_lhs       : X [do_reference = true]
-| [  1:0xfcc3a0] Op_postincrement  :
-| [  1:0xfcc4e0] Op_push           : x
-| [  1:0xfcc540] Op_push           : o
-| [  1:0xfcc500] Op_plus           :
-| [  1:0xfcc580] Op_push           : o
-| [  1:0xfcc560] Op_plus           :
-| [  1:0xfcc460] Op_leq            :
-| [   :0xfcc5c0] Op_jmp_false      : [target_jmp = 0xfcc5e0]
-| [  1:0xfcc600] Op_push_i         : "%c" [MALLOC|STRING|STRCUR]
-| [   :0xfcc660] Op_no_op          :
-| [  1:0xfcc520] Op_assign_concat  : c
-| [   :0xfcc620] Op_jmp            : [target_jmp = 0xfcc440]
…
-| [     2:0xfcc5a0] Op_K_printf         : [expr_count = 17] [redir_type = ""]
-| [      :0xfcc140] Op_no_op            :
-| [      :0xfcc1c0] Op_atexit           :
-| [      :0xfcc640] Op_stop             :
-| [      :0xfcc180] Op_no_op            :
-| [      :0xfcd150] Op_after_beginfile  :

-| [      :0xfcc160] Op_no_op            :
-| [      :0xfcc1a0] Op_after_endfile    :
gawk>

cat << \EOF > /tmp/script.$$
…                                  Your program here
EOF
gawk -D -f /tmp/script.$$
rm /tmp/script.$$

@namespace "passwd"
BEGIN { … }
…

@namespace "example"
function gsub(str, pat, result) { … }

BEGIN {
    print "in awk namespace, systime() =", systime()
}
@namespace "testing"
function systime()
{
    print "in testing namespace, systime() =", awk::systime()
}
BEGIN {
    systime()
}

$ gawk -f systime.awk
-| in awk namespace, systime() = 1500488503
-| in testing namespace, systime() = 1500488503

@namespace "awk"          This is the default namespace
BEGIN {
    Title = "My Report"   Qualified name is awk::Title
}
@namespace "report"       Now in report namespace
function compute()        This is really report::compute()
{
    print awk::Title      But would be SYMTAB["Title"]
    …
}

# ns_passwd.awk --- access password file information
@namespace "passwd"
BEGIN {
    # tailor this to suit your system
    Awklib = "/usr/local/libexec/awk/"
}
function Init(    oldfs, oldrs, olddol0, pwcat, using_fw, using_fpat)
{
    if (Inited)
        return
    oldfs = FS
    oldrs = RS
    olddol0 = $0
    using_fw = (PROCINFO["FS"] == "FIELDWIDTHS")
    using_fpat = (PROCINFO["FS"] == "FPAT")
    FS = ":"
    RS = "\n"
    pwcat = Awklib "pwcat"
    while ((pwcat | getline) > 0) {
        Byname[$1] = $0
        Byuid[$3] = $0
        Bycount[++Total] = $0
    }
    close(pwcat)
    Count = 0
    Inited = 1
    FS = oldfs
    if (using_fw)
        FIELDWIDTHS = FIELDWIDTHS
    else if (using_fpat)
        FPAT = FPAT
    RS = oldrs
    $0 = olddol0
}
function awk::getpwnam(name)
{
    Init()
    return Byname[name]
}
function awk::getpwuid(uid)
{
    Init()
    return Byuid[uid]
}
function awk::getpwent()
{
    Init()
    if (Count < Total)
        return Bycount[++Count]
    return ""
}
function awk::endpwent()
{
    Count = 0
}

BEGIN {
    while ((p = getpwent()) != "")
        print p
}

$ gawk -f ns_passwd.awk -f testpasswd.awk
-| root:x:0:0:root:/root:/bin/bash
-| daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin
-| bin:x:2:2:bin:/bin:/usr/sbin/nologin
-| sys:x:3:3:sys:/dev:/usr/sbin/nologin
…

prec = 3.322 * dps

$ gawk --version
-| GNU Awk 5.2.1, API 3.2, PMA Avon 8-g1, (GNU MPFR 4.1.0, GNU MP 6.2.1)
-| Copyright (C) 1989, 1991-2022 Free Software Foundation.
…

x = 0.875             # 1/2 + 1/4 + 1/8
y = 0.425

$ gawk 'BEGIN { x = 0.875; y = 0.425
>               printf("%0.17g, %0.17g\n", x, y) }'
-| 0.875, 0.42499999999999999

$ gawk 'BEGIN { print (0.1 + 12.2 == 12.3) }'
-| 0

delta = 0.00001                 # for example
difference = abs(a - b)         # subtract the two values
if (difference < delta)
    # all ok
else
    # not ok

BEGIN {
    x = 1.0 / sqrt(3.0)
    n = 6
    for (i = 1; i < 30; i++) {
        n = n * 2.0
        x = (sqrt(x * x + 1) - 1) / x
        printf("%.15f\n", n * x)
    }
}

$ gawk -f pi.awk
-| 3.215390309173475
-| 3.159659942097510
-| 3.146086215131467
-| 3.142714599645573
…
-| 3.224515243534819
-| 2.791117213058638
-| 0.000000000000000
error→ gawk: pi.awk:6: fatal: division by zero attempted

$ gawk 'BEGIN {
>   for (d = 1.1; d <= 1.5; d += 0.1)    # loop five times (?)
>       i++
>   print i
> }'
-| 4

$ gawk -M -v PREC=56 'BEGIN { print (0.1 + 12.2 == 12.3) }'
-| 1

$ gawk -M -v PREC=201 'BEGIN { print (0.1 + 12.2 == 12.3) }'
-| 0

$ gawk 'BEGIN {
>   for (d = 1.1; d <= 1.5; d += 0.1)    # loop five times (?)
>       i++
>   print i
> }'
-| 4

(sqrt(x * x + 1) - 1) / x ≡ x / (sqrt(x * x + 1) + 1)

$ gawk -f pi2.awk
-| 3.215390309173473
-| 3.159659942097501
-| 3.146086215131436
-| 3.142714599645370
-| 3.141873049979825
…
-| 3.141592653589797
-| 3.141592653589797

$ gawk -M -v PREC=100 'BEGIN { x = 1.0e-400; print x + 0
>   PREC = "double"; print x + 0 }'
-| 1e-400
-| 0

$ gawk -M 'BEGIN { PREC = 113; printf("%0.25f\n", 0.1) }'
-| 0.1000000000000000055511151
$ gawk -M -v PREC=113 'BEGIN { printf("%0.25f\n", 0.1) }'
-| 0.1000000000000000000000000
$ gawk -M 'BEGIN { PREC = 113; printf("%0.25f\n", "0.1") }'
-| 0.1000000000000000000000000
$ gawk -M 'BEGIN { PREC = 113; printf("%0.25f\n", 1/10) }'
-| 0.1000000000000000000000000

BEGIN {
    x = -4.5
    for (i = 1; i < 10; i++) {
        x += 1.0
        printf("%4.1f => %2.0f\n", x, x)
    }
}

-3.5 => -4
-2.5 => -2
-1.5 => -2
-0.5 => 0
 0.5 => 0
 1.5 => 2
 2.5 => 2
 3.5 => 4
 4.5 => 4

BEGIN {
    pi = 3.1416
    OFMT = "%.f"        # Print value as integer
    print pi            # ROUNDMODE = "N" by default.
    ROUNDMODE = "U"     # Now change ROUNDMODE
    print pi
}

$ gawk -M -f roundmode.awk
-| 3
-| 4

$ gawk -M 'BEGIN {
>   x = 5^4^3^2
>   print "number of digits =", length(x)
>   print substr(x, 1, 20), "...", substr(x, length(x) - 19, 20)
> }'
-| number of digits = 183231
-| 62060698786608744707 ... 92256259918212890625

$ gawk -M 'BEGIN {
>   s = 2.0
>   for (i = 1; i <= 7; i++)
>       s = s * (s - 1) + 1
>   print s
> }'
-| 113423713055421845118910464

gawk -M 'BEGIN { n = 13; print (n + 0.0) % 2.0 }'

gawk -M 'BEGIN { n = 13.0; print n % 2.0 }'

gawk -M 'BEGIN { n = 13; print n % 2 }'

# adequate_math_precision --- return true if we have enough bits
function adequate_math_precision(n)
{
    return (1 != (1+(1/(2^(n-1)))))
}

BEGIN {
    # How many bits of mantissa precision are required
    # for this program to function properly?
    fpbits = 123
    # We hope that we were invoked with MPFR enabled. If so, the
    # following statement should configure calculations to our desired
    # precision.
    PREC = fpbits
    if (! adequate_math_precision(fpbits)) {
        print("Error: insufficient computation precision available.\n" \
              "Try again with the -M argument?") > "/dev/stderr"
        # Note: you may need to set a flag here to bail out of END rules
        exit 1
    }
}

$ echo nanny | gawk --posix '{ print $1 + 0 }'
-| nan
$ echo 0xDeadBeef | gawk --posix '{ print $1 + 0 }'
-| 3735928559

$ echo nanny | gawk '{ print $1 + 0 }'
-| 0
$ echo +nan | gawk '{ print $1 + 0 }'
-| +nan
$ echo 0xDeadBeef | gawk '{ print $1 + 0 }'
-| 0

int plugin_is_GPL_compatible;

awk_value_t result;
char *message;
const char greet[] = "Don't Panic!";
emalloc(message, char *, sizeof(greet), "myfunc");
strcpy(message, greet);
make_malloced_string(message, strlen(message), & result);

char *p = gawk_malloc(42);      p ``owns'' the memory
char *q = p;
p = NULL;                       now q ``owns'' it

mpz_t part1, part2, answer;             declare local values
mpz_set_si(part1, 21);                  do some computations
mpz_set_si(part2, 21);
mpz_add(answer, part1, part2);
…
/* assume that result is a parameter of type (awk_value_t *). */
make_number_mpz(answer, & result);      set it with final GMP value
mpz_clear(part1);                       release intermediate values
mpz_clear(part2);
return result;                          value in answer managed by gawk

typedef struct awk_ext_func {
    const char *name;
    awk_value_t *(*const function)(int num_actual_args,
                                   awk_value_t *result,
                                   struct awk_ext_func *finfo);
    const size_t max_expected_args;
    const size_t min_required_args;
    awk_bool_t suppress_lint;
    void *data;        /* opaque pointer to any extra state */
} awk_ext_func_t;

typedef struct awk_input_parser {
    const char *name;   /* name of parser */
    awk_bool_t (*can_take_file)(const awk_input_buf_t *iobuf);
    awk_bool_t (*take_control_of)(awk_input_buf_t *iobuf);
    awk_const struct awk_input_parser *awk_const next;   /* for gawk */
} awk_input_parser_t;

typedef struct awk_input {
    const char *name;       /* filename */
    int fd;                 /* file descriptor */
#define INVALID_HANDLE (-1)
    void *opaque;           /* private data for input parsers */
    int (*get_record)(char **out, struct awk_input *iobuf,
                      int *errcode, char **rt_start, size_t *rt_len,
                      const awk_fieldwidth_info_t **field_width);
    ssize_t (*read_func)();
    void (*close_func)(struct awk_input *iobuf);
    struct stat sbuf;       /* stat buf */
} awk_input_buf_t;

typedef struct {
        awk_bool_t     use_chars; /* false ==> use bytes */
        size_t         nf;        /* number of fields in record (NF) */
        struct awk_field_info {
                size_t skip;      /* amount to skip before field starts */
                size_t len;       /* length of field */
        } fields[1];              /* actual dimension should be nf */
} awk_fieldwidth_info_t;

typedef struct awk_output_wrapper {
    const char *name;   /* name of the wrapper */
    awk_bool_t (*can_take_file)(const awk_output_buf_t *outbuf);
    awk_bool_t (*take_control_of)(awk_output_buf_t *outbuf);
    awk_const struct awk_output_wrapper *awk_const next;  /* for gawk */
} awk_output_wrapper_t;

typedef struct awk_output_buf {
    const char *name;   /* name of output file */
    const char *mode;   /* mode argument to fopen */
    FILE *fp;           /* stdio file pointer */
    awk_bool_t redirected;  /* true if a wrapper is active */
    void *opaque;       /* for use by output wrapper */
    size_t (*gawk_fwrite)(const void *buf, size_t size, size_t count,
                FILE *fp, void *opaque);
    int (*gawk_fflush)(FILE *fp, void *opaque);
    int (*gawk_ferror)(FILE *fp, void *opaque);
    int (*gawk_fclose)(FILE *fp, void *opaque);
} awk_output_buf_t;

typedef struct awk_two_way_processor {
    const char *name;   /* name of the two-way processor */
    awk_bool_t (*can_take_two_way)(const char *name);
    awk_bool_t (*take_control_of)(const char *name,
                                  awk_input_buf_t *inbuf,
                                  awk_output_buf_t *outbuf);
    awk_const struct awk_two_way_processor *awk_const next;  /* for gawk */
} awk_two_way_processor_t;

/*  do_magic --- do something really great */
static awk_value_t *
do_magic(int nargs, awk_value_t *result)
{
    awk_value_t value;
    if (   sym_lookup("MAGIC_VAR", AWK_NUMBER, & value)
        && some_condition(value.num_value)) {
            value.num_value += 42;
            sym_update("MAGIC_VAR", & value);
    }
    return make_number(0.0, result);
}

static awk_scalar_t magic_var_cookie;    /* cookie for MAGIC_VAR */
static void
my_extension_init()
{
    awk_value_t value;

    /* install initial value */
    sym_update("MAGIC_VAR", make_number(42.0, & value));
    /* get the cookie */
    sym_lookup("MAGIC_VAR", AWK_SCALAR, & value);
    /* save the cookie */
    magic_var_cookie = value.scalar_cookie;
    …
}

/*  do_magic --- do something really great */
static awk_value_t *
do_magic(int nargs, awk_value_t *result)
{
    awk_value_t value;
    if (   sym_lookup_scalar(magic_var_cookie, AWK_NUMBER, & value)
        && some_condition(value.num_value)) {
            value.num_value += 42;
            sym_update_scalar(magic_var_cookie, & value);
    }
    …
    return make_number(0.0, result);
}

static awk_value_cookie_t answer_cookie;  /* static value cookie */
static void
my_extension_init()
{
    awk_value_t value;
    char *long_string;
    size_t long_string_len;
    /* code from earlier */
    …
    /* … fill in long_string and long_string_len … */
    make_malloced_string(long_string, long_string_len, & value);
    create_value(& value, & answer_cookie);    /* create cookie */
    …
}

static awk_value_t *
do_magic(int nargs, awk_value_t *result)
{
    awk_value_t new_value;
    …    /* as earlier */
    value.val_type = AWK_VALUE_COOKIE;
    value.value_cookie = answer_cookie;
    sym_update("VAR1", & value);
    sym_update("VAR2", & value);
    …
    sym_update("VAR100", & value);
    …
}

@load "testext"
BEGIN {
    n = split("blacky rusty sophie raincloud lucky", pets)
    printf("pets has %d elements\n", length(pets))
    ret = dump_array_and_delete("pets", "3")
    printf("dump_array_and_delete(pets) returned %d\n", ret)
    if ("3" in pets)
        printf("dump_array_and_delete() did NOT remove index \"3\"!\n")
    else
        printf("dump_array_and_delete() did remove index \"3\"!\n")
    print ""
}

static awk_value_t *
dump_array_and_delete(int nargs, awk_value_t *result)
{
    awk_value_t value, value2, value3;
    awk_flat_array_t *flat_array;
    size_t count;
    char *name;
    int i;
    assert(result != NULL);
    make_number(0.0, result);
    if (nargs != 2) {
        printf("dump_array_and_delete: nargs not right "
               "(%d should be 2)\n", nargs);
        goto out;
    }

    /* get argument named array as flat array and print it */
    if (get_argument(0, AWK_STRING, & value)) {
        name = value.str_value.str;
        if (sym_lookup(name, AWK_ARRAY, & value2))
            printf("dump_array_and_delete: sym_lookup of %s passed\n",
                   name);
        else {
            printf("dump_array_and_delete: sym_lookup of %s failed\n",
                   name);
            goto out;
        }
    } else {
        printf("dump_array_and_delete: get_argument(0) failed\n");
        goto out;
    }

    if (! get_element_count(value2.array_cookie, & count)) {
        printf("dump_array_and_delete: get_element_count failed\n");
        goto out;
    }
    printf("dump_array_and_delete: incoming size is %lu\n",
           (unsigned long) count);

    if (! flatten_array_typed(value2.array_cookie, & flat_array,
                              AWK_STRING, AWK_UNDEFINED)) {
        printf("dump_array_and_delete: could not flatten array\n");
        goto out;
    }
    if (flat_array->count != count) {
        printf("dump_array_and_delete: flat_array->count (%lu)"
               " != count (%lu)\n",
                (unsigned long) flat_array->count,
                (unsigned long) count);
        goto out;
    }

    if (! get_argument(1, AWK_STRING, & value3)) {
        printf("dump_array_and_delete: get_argument(1) failed\n");
        goto out;
    }

    for (i = 0; i < flat_array->count; i++) {
        printf("\t%s[\"%.*s\"] = %s\n",
            name,
            (int) flat_array->elements[i].index.str_value.len,
            flat_array->elements[i].index.str_value.str,
            valrep2str(& flat_array->elements[i].value));
        if (strcmp(value3.str_value.str,
                   flat_array->elements[i].index.str_value.str) == 0) {
            flat_array->elements[i].flags |= AWK_ELEMENT_DELETE;
            printf("dump_array_and_delete: marking element \"%s\" "
                   "for deletion\n",
                flat_array->elements[i].index.str_value.str);
        }
    }

    if (! release_flattened_array(value2.array_cookie, flat_array)) {
        printf("dump_array_and_delete: could not release flattened array\n");
        goto out;
    }

    make_number(1.0, result);
out:
    return result;
}

pets has 5 elements
dump_array_and_delete: sym_lookup of pets passed
dump_array_and_delete: incoming size is 5
        pets["1"] = "blacky"
        pets["2"] = "rusty"
        pets["3"] = "sophie"
dump_array_and_delete: marking element "3" for deletion
        pets["4"] = "raincloud"
        pets["5"] = "lucky"
dump_array_and_delete(pets) returned 1
dump_array_and_delete() did remove index "3"!

awk_value_t val;
awk_array_t new_array;
new_array = create_array();
val.val_type = AWK_ARRAY;
val.array_cookie = new_array;
/* install array in the symbol table */
sym_update("array", & val);
new_array = val.array_cookie;    /* YOU MUST DO THIS */

/* create_new_array --- create a named array */
static void
create_new_array()
{
    awk_array_t a_cookie;
    awk_array_t subarray;
    awk_value_t index, value;
    a_cookie = create_array();
    value.val_type = AWK_ARRAY;
    value.array_cookie = a_cookie;
    if (! sym_update("new_array", & value))
        printf("create_new_array: sym_update(\"new_array\") failed!\n");
    a_cookie = value.array_cookie;

    (void) make_const_string("hello", 5, & index);
    (void) make_const_string("world", 5, & value);
    if (! set_array_element(a_cookie, & index, & value)) {
        printf("fill_in_array: set_array_element failed\n");
        return;
    }
    (void) make_const_string("answer", 6, & index);
    (void) make_number(42.0, & value);
    if (! set_array_element(a_cookie, & index, & value)) {
        printf("fill_in_array: set_array_element failed\n");
        return;
    }

    (void) make_const_string("subarray", 8, & index);
    subarray = create_array();
    value.val_type = AWK_ARRAY;
    value.array_cookie = subarray;
    if (! set_array_element(a_cookie, & index, & value)) {
        printf("fill_in_array: set_array_element failed\n");
        return;
    }
    subarray = value.array_cookie;

    (void) make_const_string("foo", 3, & index);
    (void) make_const_string("bar", 3, & value);
    if (! set_array_element(subarray, & index, & value)) {
        printf("fill_in_array: set_array_element failed\n");
        return;
    }
}

@load "subarray"
function dumparray(name, array,     i)
{
    for (i in array)
        if (isarray(array[i]))
            dumparray(name "[\"" i "\"]", array[i])
        else
            printf("%s[\"%s\"] = %s\n", name, i, array[i])
}
BEGIN {
    dumparray("new_array", new_array);
}

$ AWKLIBPATH=$PWD gawk -f subarray.awk
-| new_array["subarray"]["foo"] = bar
-| new_array["hello"] = world
-| new_array["answer"] = 42

if (   api->major_version != GAWK_API_MAJOR_VERSION
    || api->minor_version < GAWK_API_MINOR_VERSION) {
        fprintf(stderr, "foo_extension: version mismatch with gawk!\n");
        fprintf(stderr, "\tmy version (%d, %d), gawk version (%d, %d)\n",
                GAWK_API_MAJOR_VERSION, GAWK_API_MINOR_VERSION,
                api->major_version, api->minor_version);
        exit(1);
}

/* Boilerplate code: */
int plugin_is_GPL_compatible;
static gawk_api_t *const api;

static awk_ext_id_t ext_id;
static const char *ext_version = NULL; /* or … = "some string" */
static awk_ext_func_t func_table[] = {
    { "name", do_name, 1, 0, awk_false, NULL },
    /* … */
};
/* EITHER: */
static awk_bool_t (*init_func)(void) = NULL;
/* OR: */
static awk_bool_t
init_my_extension(void)
{
    …
}
static awk_bool_t (*init_func)(void) = init_my_extension;
dl_load_func(func_table, some_name, "name_space_in_quotes")

@load "filefuncs"
…
newdir = "/home/arnold/funstuff"
ret = chdir(newdir)
if (ret < 0) {
    printf("could not change to %s: %s\n", newdir, ERRNO) > "/dev/stderr"
    exit 1
}
…

file = "/home/arnold/.profile"
ret = stat(file, fdata)
if (ret < 0) {
    printf("could not stat %s: %s\n",
             file, ERRNO) > "/dev/stderr"
    exit 1
}
printf("size of %s is %d bytes\n", file, fdata["size"])

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <stdio.h>
#include <assert.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "gawkapi.h"
#include "gettext.h"
#define _(msgid)  gettext(msgid)
#define N_(msgid) msgid
#include "gawkfts.h"
#include "stack.h"
static const gawk_api_t *api;    /* for convenience macros to work */
static awk_ext_id_t ext_id;
static awk_bool_t init_filefuncs(void);
static awk_bool_t (*init_func)(void) = init_filefuncs;
static const char *ext_version = "filefuncs extension: version 1.0";
int plugin_is_GPL_compatible;

/*  do_chdir --- provide dynamically loaded chdir() function for gawk */
static awk_value_t *
do_chdir(int nargs, awk_value_t *result, struct awk_ext_func *unused)

{
    awk_value_t newdir;
    int ret = -1;
    assert(result != NULL);

    if (get_argument(0, AWK_STRING, & newdir)) {
        ret = chdir(newdir.str_value.str);
        if (ret < 0)
            update_ERRNO_int(errno);
    }

    return make_number(ret, result);
}

/* format_mode --- turn a stat mode field into something readable */
static char *
format_mode(unsigned long fmode)
{
    …
}

/* read_symlink --- read a symbolic link into an allocated buffer.
   … */
static char *
read_symlink(const char *fname, size_t bufsize, ssize_t *linksize)
{
    …
}

/* array_set --- set an array element */
static void
array_set(awk_array_t array, const char *sub, awk_value_t *value)
{
    awk_value_t index;
    set_array_element(array,
                      make_const_string(sub, strlen(sub), & index),
                      value);
}
/* array_set_numeric --- set an array element with a number */
static void
array_set_numeric(awk_array_t array, const char *sub, double num)
{
    awk_value_t tmp;
    array_set(array, sub, make_number(num, & tmp));
}

/* fill_stat_array --- do the work to fill an array with stat info */
static int
fill_stat_array(const char *name, awk_array_t array, struct stat *sbuf)
{
    char *pmode;    /* printable mode */
    const char *type = "unknown";
    awk_value_t tmp;
    static struct ftype_map {
        unsigned int mask;
        const char *type;
    } ftype_map[] = {
        { S_IFREG, "file" },
        { S_IFBLK, "blockdev" },
        { S_IFCHR, "chardev" },
        { S_IFDIR, "directory" },
#ifdef S_IFSOCK
        { S_IFSOCK, "socket" },
#endif
#ifdef S_IFIFO
        { S_IFIFO, "fifo" },
#endif
#ifdef S_IFLNK
        { S_IFLNK, "symlink" },
#endif
#ifdef S_IFDOOR /* Solaris weirdness */
        { S_IFDOOR, "door" },
#endif
    };
    int j, k;

    /* empty out the array */
    clear_array(array);
    /* fill in the array */
    array_set(array, "name", make_const_string(name, strlen(name),
                                               & tmp));
    array_set_numeric(array, "dev", sbuf->st_dev);
    array_set_numeric(array, "ino", sbuf->st_ino);
    array_set_numeric(array, "mode", sbuf->st_mode);
    array_set_numeric(array, "nlink", sbuf->st_nlink);
    array_set_numeric(array, "uid", sbuf->st_uid);
    array_set_numeric(array, "gid", sbuf->st_gid);
    array_set_numeric(array, "size", sbuf->st_size);
    array_set_numeric(array, "blocks", sbuf->st_blocks);
    array_set_numeric(array, "atime", sbuf->st_atime);
    array_set_numeric(array, "mtime", sbuf->st_mtime);
    array_set_numeric(array, "ctime", sbuf->st_ctime);
    /* for block and character devices, add rdev,
       major and minor numbers */
    if (S_ISBLK(sbuf->st_mode) || S_ISCHR(sbuf->st_mode)) {
        array_set_numeric(array, "rdev", sbuf->st_rdev);
        array_set_numeric(array, "major", major(sbuf->st_rdev));
        array_set_numeric(array, "minor", minor(sbuf->st_rdev));
    }

#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
    array_set_numeric(array, "blksize", sbuf->st_blksize);
#endif

    pmode = format_mode(sbuf->st_mode);
    array_set(array, "pmode", make_const_string(pmode, strlen(pmode),
                                                & tmp));
    /* for symbolic links, add a linkval field */
    if (S_ISLNK(sbuf->st_mode)) {
        char *buf;
        ssize_t linksize;
        if ((buf = read_symlink(name, sbuf->st_size,
                    & linksize)) != NULL)
            array_set(array, "linkval",
                      make_malloced_string(buf, linksize, & tmp));
        else
            warning(ext_id, _("stat: unable to read symbolic link `%s'"),
                    name);
    }
    /* add a type field */
    type = "unknown";   /* shouldn't happen */
    for (j = 0, k = sizeof(ftype_map)/sizeof(ftype_map[0]); j < k; j++) {
        if ((sbuf->st_mode & S_IFMT) == ftype_map[j].mask) {
            type = ftype_map[j].type;
            break;
        }
    }
    array_set(array, "type", make_const_string(type, strlen(type), & tmp));
    return 0;
}

/* do_stat --- provide a stat() function for gawk */
static awk_value_t *
do_stat(int nargs, awk_value_t *result, struct awk_ext_func *unused)
{
    awk_value_t file_param, array_param;
    char *name;
    awk_array_t array;
    int ret;
    struct stat sbuf;
    /* default is lstat() */
    int (*statfunc)(const char *path, struct stat *sbuf) = lstat;
    assert(result != NULL);

    /* file is first arg, array to hold results is second */
    if (   ! get_argument(0, AWK_STRING, & file_param)
        || ! get_argument(1, AWK_ARRAY, & array_param)) {
        warning(ext_id, _("stat: bad parameters"));
        return make_number(-1, result);
    }
    if (nargs == 3) {
        statfunc = stat;
    }
    name = file_param.str_value.str;
    array = array_param.array_cookie;
    /* always empty out the array */
    clear_array(array);
    /* stat the file; if error, set ERRNO and return */
    ret = statfunc(name, & sbuf);

    if (ret < 0) {
        update_ERRNO_int(errno);
        return make_number(ret, result);
    }

    ret = fill_stat_array(name, array, & sbuf);
    return make_number(ret, result);
}

/* init_filefuncs --- initialization routine */
static awk_bool_t
init_filefuncs(void)
{
    …
}

static awk_ext_func_t func_table[] = {
    { "chdir", do_chdir, 1, 1, awk_false, NULL },
    { "stat",  do_stat, 3, 2, awk_false, NULL },
    …
};

/* define the dl_load() function using the boilerplate macro */
dl_load_func(func_table, filefuncs, "")

$ gcc -fPIC -shared -DHAVE_CONFIG_H -c -O -g -Iidir filefuncs.c
$ gcc -o filefuncs.so -shared filefuncs.o

# file testff.awk
@load "filefuncs"
BEGIN {
    "pwd" | getline curdir  # save current directory
    close("pwd")
    chdir("/tmp")
    system("pwd")   # test it
    chdir(curdir)   # go back
    print "Info for testff.awk"
    ret = stat("testff.awk", data)
    print "ret =", ret
    for (i in data)
        printf "data[\"%s\"] = %s\n", i, data[i]
    print "testff.awk modified:",
        strftime("%m %d %Y %H:%M:%S", data["mtime"])
    print "\nInfo for JUNK"
    ret = stat("JUNK", data)
    print "ret =", ret
    for (i in data)
        printf "data[\"%s\"] = %s\n", i, data[i]
    print "JUNK modified:", strftime("%m %d %Y %H:%M:%S", data["mtime"])
}

$ AWKLIBPATH=$PWD gawk -f testff.awk
-| /tmp
-| Info for testff.awk
-| ret = 0
-| data["blksize"] = 4096
-| data["devbsize"] = 512
-| data["mtime"] = 1412004710
-| data["mode"] = 33204
-| data["type"] = file
-| data["dev"] = 2053
-| data["gid"] = 1000
-| data["ino"] = 10358899
-| data["ctime"] = 1412004710
-| data["blocks"] = 8
-| data["nlink"] = 1
-| data["name"] = testff.awk
-| data["atime"] = 1412004716
-| data["pmode"] = -rw-rw-r--
-| data["size"] = 666
-| data["uid"] = 1000
-| testff.awk modified: 09 29 2014 18:31:50
-|
-| Info for JUNK
-| ret = -1
-| JUNK modified: 01 01 1970 02:00:00

@load "fnmatch"
…
flags = or(FNM["PERIOD"], FNM["NOESCAPE"])
if (fnmatch("*.a", "foo.c", flags) == FNM_NOMATCH)
    print "no match"

@load "fork"
…
if ((pid = fork()) == 0)
    print "hello from the child"
else
    print "hello from the parent"

# inplace --- load and invoke the inplace extension.
@load "inplace"
# Please set inplace::suffix to make a backup copy.  For example, you may
# want to set inplace::suffix to .bak on the command line or in a BEGIN rule.
# Before there were namespaces in gawk, this extension used
# INPLACE_SUFFIX as the variable for making backup copies. We allow this
# too, so that any code that used the previous version continues to work.
# By default, each filename on the command line will be edited inplace.
# But you can selectively disable this by adding an inplace::enable=0 argument
# prior to files that you do not want to process this way.  You can then
# reenable it later on the commandline by putting inplace::enable=1 before files
# that you wish to be subject to inplace editing.
# N.B. We call inplace::end() in the BEGINFILE and END rules so that any
# actions in an ENDFILE rule will be redirected as expected.
@namespace "inplace"

BEGIN {
    enable = 1         # enabled by default
}

BEGINFILE {
    sfx = (suffix ? suffix : awk::INPLACE_SUFFIX)
    if (filename != "")
        end(filename, sfx)
    if (enable)
        begin(filename = FILENAME, sfx)
    else
        filename = ""
}

END {
    if (filename != "")
        end(filename, (suffix ? suffix : awk::INPLACE_SUFFIX))
}

$ gawk -i inplace '{ gsub(/foo/, "bar") }; { print }' file1 file2 file3

$ gawk -i inplace -v inplace::suffix=.bak '{ gsub(/foo/, "bar") }
> { print }' file1 file2 file3

@load "ordchr"
…
printf("The numeric value of 'A' is %d\n", ord("A"))
printf("The string value of 65 is %s\n", chr(65))

@load "readdir"

@load "readdir"
…
BEGIN { FS = "/" }
{ print "file name is", $2 }

@load "revoutput"
BEGIN {
    REVOUT = 1
    print "don't panic" > "/dev/stdout"
}

@load "revtwoway"
BEGIN {
    cmd = "/magic/mirror"
    print "don't panic" |& cmd
    cmd |& getline result
    print result
    close(cmd)
}

@load "rwarray"
…
ret = writea("arraydump.bin", array)
…
ret = reada("arraydump.bin", array)
…
ret = writeall("globalstate.bin")
…
ret = readall("globalstate.bin")

@load "readfile"
…
contents = readfile("/path/to/file");
if (contents == "" && ERRNO != "") {
    print("problem reading file", ERRNO) > "/dev/stderr"
    ...
}

git clone git://git.code.sf.net/p/gawkextlib/code gawkextlib-code

cd .../path/to/gawk/code
./configure --prefix=/tmp/newgawk     Install in /tmp/newgawk for now
make && make check                    Build and check that all is OK
make install                          Install gawk

$ echo something1234abc | gawk-3.1.8 '{ sub("[A-Z]*$", ""); print }'
-| something1234a

wget https://ftp.gnu.org/gnu/gawk/gawk-5.2.2.tar.gz

gzip -d -c gawk-5.2.2.tar.gz | tar -xvpf -

tar -xvpzf gawk-5.2.2.tar.gz

sh ./configure

CC=cc CFLAGS=-g sh ./configure

make

git clone https://git.savannah.gnu.org/r/gawk.git
cd gawk
./bootstrap.sh && ./configure && make && make check

cd doc
make pdf

cd doc
make html

gawk -v BINMODE=2 -v ORS="\r\n" …

gawk -v BINMODE=w -f binmode2.awk …

gawk -v RS="\r\n" -e "BEGIN { BINMODE = 1 }" …

gawk -f binmode1.awk …

tar -xvpzf gawk-5.2.2.tar.gz
cd gawk-5.2.2
./configure
make && make check

$ @[.vms]vmsbuild.com

$ MMS/DESCRIPTION=[.vms]descrip.mms gawk

$ MMK/DESCRIPTION=[.vms]descrip.mms gawk

$ MMS/DESCRIPTION=[.vms]descrip.mms extensions

$ MMK/DESCRIPTION=[.vms]descrip.mms extensions

/name=(as_is,short)
/float=ieee/ieee_mode=denorm_results

#if (__CRTL_VER >= 70200000)
#define _LARGEFILE 1
#endif
#ifdef __CRTL_VER
#if __CRTL_VER >= 80200000
#define _USE_STD_STAT 1
#endif
#endif

$ GAWK :== $disk1:[gnubin]gawk

$ set command gnv$gnu:[vms_bin]gawk_verb.cld

$ LIBRARY/HELP sys$help:helplib [.vms]gawk.hlp

$ HELP GAWK

$ gawk -- "BEGIN {print ""Hello, World!""}"
$ gawk -"W" version
! could also be -"W version" or "-W version"

unix_status = (vms_status .and. %x7f8) / 8

$ sort = "@device:[dir]vms_gawk_sort.com"

$!'f$verify(0,0)'
$ sort := sort
$ define/user sys$input sys$command:
$ sort sys$input: sys$output:

$ tar -xpzvf gawk-X.Y.Z.tar.gz
-| …                                Output omitted
$ cd gawk-X.Y.Z
$ ./configure
-| …                                Output omitted

$ make
-| …                                Output omitted

$ ./gawk -f realprogram.awk realdata > /dev/null

$ echo help | mailx -s request [email protected]
$ echo subscribe | mailx -s request [email protected]
$ echo unsubscribe | mailx -s request [email protected]

git clone https://github.com/onetrueawk/awk bwkawk

wget https://ftp.gnu.org/gnu/gawk/gawk-5.2.2.tar.gz
tar -xvpzf gawk-5.2.2.tar.gz
cd gawk-5.2.2
./configure && make && make check

git clone git://git.savannah.gnu.org/gawk.git

git clone https://git.savannah.gnu.org/r/gawk.git

# The canonical incantation for building GNU software:
./bootstrap.sh && ./configure && make

wget https://ftp.gnu.org/gnu/package/package-x.y.z.tar.gz
tar -xpzvf package-x.y.z.tar.gz
cd package-x.y.z
./configure && make && make check
make install    # as root

wget https://git.savannah.gnu.org/cgit/gawk.git/snapshot/gawk-branchname.tar.gz

one line to give the program's name and a brief idea of what it does.
Copyright (C) year name of author
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program.  If not, see https://www.gnu.org/licenses/.

program Copyright (C) year name of author
This program comes with ABSOLUTELY NO WARRANTY; for details type ‘show w’.
This is free software, and you are welcome to redistribute it
under certain conditions; type ‘show c’ for details.

Copyright © 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.
https://fsf.org/
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.

  Copyright (C)  year  your name.
  Permission is granted to copy, distribute and/or modify this document
  under the terms of the GNU Free Documentation License, Version 1.3
  or any later version published by the Free Software Foundation;
  with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
  Texts.  A copy of the license is included in the section entitled ``GNU
  Free Documentation License''.

    with the Invariant Sections being list their titles, with
    the Front-Cover Texts being list, and with the Back-Cover Texts
    being list.

Class	Meaning
`[:alnum:]`	Alphanumeric characters
`[:alpha:]`	Alphabetic characters
`[:blank:]`	Space and TAB characters
`[:cntrl:]`	Control characters
`[:digit:]`	Numeric characters
`[:graph:]`	Characters that are both printable and visible (a space is printable but not visible, whereas an ‘`a`’ is both)
`[:lower:]`	Lowercase alphabetic characters
`[:print:]`	Printable characters (characters that are not control characters)
`[:punct:]`	Punctuation characters (characters that are not letters, digits, control characters, or space characters)
`[:space:]`	Space characters (these are: space, TAB, newline, carriage return, formfeed and vertical tab)
`[:upper:]`	Uppercase alphabetic characters
`[:xdigit:]`	Characters that are hexadecimal digits

Variant	Effect	`awk` / `gawk`
`getline`	Sets `$0`, `NF`, `FNR`, `NR`, and `RT`	`awk`
`getline` `var`	Sets `var`, `FNR`, `NR`, and `RT`	`awk`
`getline <` `file`	Sets `$0`, `NF`, and `RT`	`awk`
`getline var < file`	Sets `var` and `RT`	`awk`
`command` `\| getline`	Sets `$0`, `NF`, and `RT`	`awk`
`command` `\| getline` `var`	Sets `var` and `RT`	`awk`
`command` `\|& getline`	Sets `$0`, `NF`, and `RT`	`gawk`
`command` `\|& getline` `var`	Sets `var` and `RT`	`gawk`

Value of `RS`	Records are split on …	`awk` / `gawk`
Any single character	That character	`awk`
The empty string (`""`)	Runs of two or more newlines	`awk`
A regexp	Text that matches the regexp	`gawk`

Field separator value	Fields are split …	`awk` / `gawk`
`FS == " "`	On runs of whitespace	`awk`
`FS == any single character`	On that character	`awk`
`FS == regexp`	On text matching the regexp	`awk`
`FS == ""`	Such that each individual character is a separate field	`gawk`
`FIELDWIDTHS == list of columns`	Based on character position	`gawk`
`FPAT == regexp`	On the text surrounding text matching the regexp	`gawk`

Situation	Return value from `close()`
Normal exit of command	Command’s exit status
Death by signal of command	256 + number of murderous signal
Death by signal of command with core dump	512 + number of murderous signal
Some kind of error	−1

Feature	Default	`--posix` or `--use-lc-numeric`
`%'g`	Use locale	Use locale
`%g`	Use period	Use locale
Input	Use period	Use locale
`strtonum()`	Use period	Use locale

Operator	Effect
`lvalue` `+=` `increment`	Add `increment` to the value of `lvalue`.
`lvalue` `-=` `decrement`	Subtract `decrement` from the value of `lvalue`.
`lvalue` `*=` `coefficient`	Multiply the value of `lvalue` by `coefficient`.
`lvalue` `/=` `divisor`	Divide the value of `lvalue` by `divisor`.
`lvalue` `%=` `modulus`	Set `lvalue` to its remainder by `modulus`.
`lvalue` `^=` `power`	Raise `lvalue` to the power `power`.
`lvalue` `**=` `power`	Raise `lvalue` to the power `power`. (c.e.)

Expression	Result
`x` `<` `y`	True if `x` is less than `y`
`x` `<=` `y`	True if `x` is less than or equal to `y`
`x` `>` `y`	True if `x` is greater than `y`
`x` `>=` `y`	True if `x` is greater than or equal to `y`
`x` `==` `y`	True if `x` is equal to `y`
`x` `!=` `y`	True if `x` is not equal to `y`
`x` `~` `y`	True if the string `x` matches the regexp denoted by `y`
`x` `!~` `y`	True if the string `x` does not match the regexp denoted by `y`
`subscript` `in` `array`	True if the array `array` has an element with the subscript `subscript`

Situation	Return value from `system()`
`--traditional`	C `system()`’s value divided by 256
`--posix`	C `system()`’s value
Normal exit of command	Command’s exit status
Death by signal of command	256 + number of murderous signal
Death by signal of command with core dump	512 + number of murderous signal
Some kind of error	−1

Representation	Minimum value	Maximum value
32-bit signed integer	−2,147,483,648	2,147,483,647
32-bit unsigned integer	0	4,294,967,295
64-bit signed integer	−9,223,372,036,854,775,808	9,223,372,036,854,775,807
64-bit unsigned integer	0	18,446,744,073,709,551,615

Representation	Minimum positive nonzero value	Minimum finite value	Maximum finite value
Single-precision floating-point	1.175494*10^-38	-3.402823*10³⁸	3.402823*10³⁸
Double-precision floating-point	2.225074*10^-308	-1.797693*10³⁰⁸	1.797693*10³⁰⁸
Quadruple-precision floating-point	3.362103*10^-4932	-1.189731*10⁴⁹³²	1.189731*10⁴⁹³²

Name	Total bits	Precision	Minimum exponent	Maximum exponent
Single	32	24	−126	+127
Double	64	53	−1022	+1023
Quadruple	128	113	−16382	+16383

`PREC`	IEEE 754 binary format
`"half"`	16-bit half-precision
`"single"`	Basic 32-bit single precision
`"double"`	Basic 64-bit double precision
`"quad"`	Basic 128-bit quadruple precision
`"oct"`	256-bit octuple precision

Rounding mode	IEEE name	`ROUNDMODE`
Round to nearest, ties to even	`roundTiesToEven`	`"N"` or `"n"`
Round toward positive infinity	`roundTowardPositive`	`"U"` or `"u"`
Round toward negative infinity	`roundTowardNegative`	`"D"` or `"d"`
Round toward zero	`roundTowardZero`	`"Z"` or `"z"`
Round away from zero		`"A"` or `"a"`

C entity	Header file
`EOF`	`<stdio.h>`
Values for `errno`	`<errno.h>`
`FILE`	`<stdio.h>`
`NULL`	`<stddef.h>`
`memcpy()`	`<string.h>`
`memset()`	`<string.h>`
`size_t`	`<sys/types.h>`
`struct stat`	`<sys/stat.h>`

	Index	Value
	`3`	`30`
	`1`	`"foo"`
	`0`	`8`
	`2`	`""`

	Index	Value
	`10`	`"number ten"`
	`3`	`30`
	`1`	`"foo"`
	`0`	`8`
	`2`	`""`

	Index	Value
	`"dog"`	`"chien"`
	`"cat"`	`"chat"`
	`"one"`	`"un"`
	`1`	`"un"`

		String	Strnum	Number	Regex	Bool	Array	Undefined
	String	String	String	String	String	String	false	false
	Strnum	false	Strnum	Strnum	false	false	false	false
	Number	Number	Number	Number	false	Number	false	false
Type	Regex	false	false	false	Regex	false	false	false
Requested	Bool	false	false	false	false	Bool	false	false
	Array	false	false	false	false	false	Array	false
	Scalar	Scalar	Scalar	Scalar	Scalar	Scalar	false	false
	Undefined	String	Strnum	Number	Regex	Bool	Array	Undefined
	Value cookie	false	false	false	false	false	false	false

API Version	C Preprocessor Define	enum constant
Major	`gawk_api_major_version`	`GAWK_API_MAJOR_VERSION`
Minor	`gawk_api_minor_version`	`GAWK_API_MINOR_VERSION`

Subscript	Field in `struct stat`	File type
`"name"`	The file name	All
`"dev"`	`st_dev`	All
`"ino"`	`st_ino`	All
`"mode"`	`st_mode`	All
`"nlink"`	`st_nlink`	All
`"uid"`	`st_uid`	All
`"gid"`	`st_gid`	All
`"size"`	`st_size`	All
`"atime"`	`st_atime`	All
`"mtime"`	`st_mtime`	All
`"ctime"`	`st_ctime`	All
`"rdev"`	`st_rdev`	Device files
`"major"`	`st_major`	Device files
`"minor"`	`st_minor`	Device files
`"blksize"`	`st_blksize`	All
`"pmode"`	A human-readable version of the mode value, like that printed by `ls` (for example, `"-rwxr-xr-x"`)	All
`"linkval"`	The value of the symbolic link	Symbolic links
`"type"`	The type of the file as a string—one of `"file"`, `"blockdev"`, `"chardev"`, `"directory"`, `"socket"`, `"fifo"`, `"symlink"`, `"door"`, or `"unknown"` (not all systems support all file types)	All

Array element	Corresponding flag defined by `fnmatch()`
`FNM["CASEFOLD"]`	`FNM_CASEFOLD`
`FNM["FILE_NAME"]`	`FNM_FILE_NAME`
`FNM["LEADING_DIR"]`	`FNM_LEADING_DIR`
`FNM["NOESCAPE"]`	`FNM_NOESCAPE`
`FNM["PATHNAME"]`	`FNM_PATHNAME`
`FNM["PERIOD"]`	`FNM_PERIOD`

Letter	File type
`b`	Block device
`c`	Character device
`d`	Directory
`f`	Regular file
`l`	Symbolic link
`p`	Named pipe (FIFO)
`s`	Socket
`u`	Anything else (unknown)

Feature	BWK `awk`	`mawk`	`gawk`	Now standard
`` and `=` operators	X		X
‘`\x`’ escape sequence	X	X	X
`/dev/stdin` special file	X	X	X
`/dev/stdout` special file	X	X	X
`/dev/stderr` special file	X	X	X
`BINMODE` variable		X	X
`FS` as null string	X	X	X
`delete` without subscript	X	X	X	X
`fflush()` function	X	X	X	X
`func` keyword	X		X
`length()` of an array	X	X	X
`nextfile` statement	X	X	X	X
`RS` as regexp	X	X	X
Time-related functions		X	X

Unix and POSIX systems	Arnold Robbins, “arnold at skeeve dot com”
MS-Windows with MinGW	Eli Zaretskii, “eliz at gnu dot org”
OpenVMS	John Malmberg, “wb8tyw at qsl dot net”
z/OS (OS/390)	Daniel Richard G. “skunk at iSKUNK dot ORG”

Item	Limit
Characters in a character class	2^(number of bits per byte)
Length of input record in bytes	`ULONG_MAX`
Length of output record	Unlimited
Length of source line	Unlimited
Number of fields in a record	`ULONG_MAX`
Number of file redirections	Unlimited
Number of input records in one file	`MAX_LONG`
Number of input records total	`MAX_LONG`
Number of pipe redirections	min(number of processes per user, number of open files)
Numeric values	Double-precision floating point (if not using MPFR)
Size of a field in bytes	`ULONG_MAX`
Size of a literal string in bytes	`ULONG_MAX`
Size of a printf string in bytes	`ULONG_MAX`

General Introduction

Table of Contents

Short Table of Contents

Foreword to the Third Edition

Foreword to the Fourth Edition

Preface

History of awk and gawk

A Rose by Any Other Name

Using This Book

Typographical Conventions

Dark Corners

The GNU Project and This Book

How to Contribute

Acknowledgments

Part I:The awk Language

1 Getting Started with awk

1.1 How to Run awk Programs

1.1.1 One-Shot Throwaway awk Programs

1.1.2 Running awk Without Input Files

1.1.3 Running Long Programs

1.1.4 Executable awk Programs

1.1.5 Comments in awk Programs

1.1.6 Shell Quoting Issues

1.1.6.1 Quoting in MS-Windows Batch Files

1.2 Data files for the Examples

1.3 Some Simple Examples

1.4 An Example with Two Rules

1.5 A More Complex Example

1.6 awk Statements Versus Lines

1.7 Other Features of awk

1.8 When to Use awk

1.9 Summary

2 Running awk and gawk

2.1 Invoking awk

2.2 Command-Line Options

2.3 Other Command-Line Arguments

2.4 Naming Standard Input

2.5 The Environment Variables gawk Uses

2.5.1 The AWKPATH Environment Variable

2.5.2 The AWKLIBPATH Environment Variable

2.5.3 Other Environment Variables

2.6 gawk’s Exit Status

2.7 Including Other Files into Your Program

2.8 Loading Dynamic Extensions into Your Program

2.9 Obsolete Options and/or Features

2.10 Undocumented Options and Features

2.11 Summary

3 Regular Expressions

3.1 How to Use Regular Expressions

3.2 Escape Sequences

3.3 Regular Expression Operators

3.3.1 Regexp Operators in awk

3.3.2 Some Notes On Interval Expressions

3.4 Using Bracket Expressions

3.5 How Much Text Matches?

3.6 Using Dynamic Regexps

3.7 gawk-Specific Regexp Operators

3.8 Case Sensitivity in Matching

3.9 Summary

4 Reading Input Files

4.1 How Input Is Split into Records

4.1.1 Record Splitting with Standard awk

4.1.2 Record Splitting with gawk

4.2 Examining Fields

4.3 Nonconstant Field Numbers

4.4 Changing the Contents of a Field

4.5 Specifying How Fields Are Separated

4.5.1 Whitespace Normally Separates Fields

4.5.2 Using Regular Expressions to Separate Fields

4.5.3 Making Each Character a Separate Field

4.5.4 Setting FS from the Command Line

4.5.5 Making the Full Line Be a Single Field

4.5.6 Field-Splitting Summary

4.6 Reading Fixed-Width Data

4.6.1 Processing Fixed-Width Data

4.6.2 Skipping Intervening Fields

4.6.3 Capturing Optional Trailing Data

4.6.4 Field Values With Fixed-Width Data

4.7 Defining Fields by Content

4.7.1 More on CSV Files

History of `awk` and `gawk`

Part I:
The `awk` Language

1 Getting Started with `awk`

1.1 How to Run `awk` Programs

1.1.1 One-Shot Throwaway `awk` Programs

1.1.2 Running `awk` Without Input Files

1.1.4 Executable `awk` Programs

1.1.5 Comments in `awk` Programs

1.6 `awk` Statements Versus Lines

1.7 Other Features of `awk`

1.8 When to Use `awk`

2 Running `awk` and `gawk`

2.1 Invoking `awk`

2.5 The Environment Variables `gawk` Uses

2.5.1 The `AWKPATH` Environment Variable

2.5.2 The `AWKLIBPATH` Environment Variable

2.6 `gawk`’s Exit Status

3.3.1 Regexp Operators in `awk`

3.7 `gawk`-Specific Regexp Operators

4.1.1 Record Splitting with Standard `awk`

4.1.2 Record Splitting with `gawk`

4.5.4 Setting `FS` from the Command Line

4.7.2 `FS` Versus `FPAT`: A Subtle Difference

4.8 Checking How `gawk` Is Splitting Records

4.10 Explicit Input with `getline`

4.10.1 Using `getline` with No Arguments

4.10.2 Using `getline` into a Variable

4.10.3 Using `getline` from a File

4.10.4 Using `getline` into a Variable from a File

4.10.5 Using `getline` from a Pipe

4.10.6 Using `getline` into a Variable from a Pipe

4.10.7 Using `getline` from a Coprocess

4.10.8 Using `getline` into a Variable from a Coprocess

4.10.9 Points to Remember About `getline`

4.10.10 Summary of `getline` Variants

5.1 The `print` Statement

5.2 `print` Statement Examples

5.4 Controlling Numeric Output with `print`

5.5 Using `printf` Statements for Fancier Printing

5.5.1 Introduction to the `printf` Statement

5.5.3 Modifiers for `printf` Formats

5.5.4 Examples Using `printf`

5.6 Redirecting Output of `print` and `printf`

5.8 Special File names in `gawk`

5.8.1 Accessing Other Open Files with `gawk`

5.9.1 Using `close()`’s Return Value

6.1.4.1 How `awk` Converts Between Strings and Numbers

6.3.1 True and False in `awk`