# Input Module # ============ # Module variable: # # $inputLine -- the unprocessed part of the latest line # of input to have been read # # Exported functions: # # &INInit -- module initialization function # &INGetToken -- returns one token read from the input. # INInit initializes variables owned by the Input module sub INInit { $inputLine = ''; } # INGetToken scans text read from the STDIN input stream and # returns one token. # # A token is implemented by a (tokentype, tokentext) pair, where: # # 1. The possible tokentypes are: 'EOF', 'TAG', 'SPACE' or 'TEXT', # which indicate an end-of-file, a HTML tag, a block of # white-space characters or a block of other text, respectively. # # 2. The tokentext component is the sequence of characters that # comprise the token. These characters either form a complete # HTML tag (including the opening and closing angle brackets), # or a group of consecutive white-space characters, # or group of non-white-space and non-tag characters. # (The tokentext component is undefined if the tokentype # is EOF.) sub INGetToken { local($token, $tokenType); if (length($inputLine) == 0) { $inputLine = ; } if ($inputLine) { if ($inputLine =~ /^\s+/) { # leading white space is extracted as the token $token = $&; $inputLine = $'; $tokenType = 'SPACE'; } elsif ($inputLine =~ /^<[^>]*>/) { # an HTML tag is extracted as the token $token = $&; $inputLine = $'; $tokenType = 'TAG'; } elsif ($inputLine =~ /^