"""Use the Python pygments library to perform extra checks on C++ grammar."""

from pygments import token
from pygments.lexers.compiled import CppLexer
import re
import os

def check_header_file(fh_name, errors):
    """Check a single C++ header file"""
    _check_file(fh_name, True, errors)

def check_cpp_file(fh_name, errors):
    """Check a single C++ source file"""
    _check_file(fh_name, False, errors)


def _check_file(fh_name, header, errors):
    fh, filename = fh_name
    s = tokenize_file(fh)
    check_tokens(s, filename, header, errors)

def tokenize_file(fh):
    """Use the Python pygments library to tokenize a C++ file"""
    code = fh.read()
    c = CppLexer()
    scan = []
    for (index, tok, value) in c.get_tokens_unprocessed(code):
        scan.append((tok, value))
    return scan

def check_tokens(scan, filename, header, errors):
    check_comment_header(scan, filename, errors)
    check_eol(scan, filename, errors)
    if header:
        check_header_start_end(scan, filename, errors)

def check_comment_header(scan, filename, errors):
    if len(scan) < 1 or scan[0][0] not in (token.Comment,
                                           token.Comment.Multiline):
        errors.append('%s:1: First line should be a comment ' % filename + \
                      'with a copyright notice and a description of the file')

def check_eol(scan, filename, errors):
    if len(scan) > 0 and ((scan[-1][0] != token.Comment.Preproc \
                           and scan[-1][0] != token.Text) \
                          or scan[-1][1] != '\n'):
        errors.append('%s:999: No end-of-line character at the ' % filename + \
                      'end of the last line in the file')
        # Add an EOL so other checks don't complain
        scan.append((token.Text, '\n'))

def have_header_guard(scan):
    return len(scan) >= 10 \
           and scan[3][0] == token.Comment.Preproc \
           and scan[3][1].startswith('ifndef') \
           and scan[6][0] == token.Comment.Preproc \
           and scan[6][1].startswith('define') \
           and scan[-3][0] == token.Comment.Preproc \
           and scan[-3][1].startswith('endif') \
           and scan[-2][0] in (token.Comment, token.Comment.Multiline)

def header_guard_ok(scan, guard_prefix, guard_suffix):
    """Make sure the guard has the correct prefix and suffix, and is consistent
       between the #ifndef, #define and #endif lines"""
    guard = scan[3][1][7:]
    return guard.startswith(guard_prefix) and guard.endswith(guard_suffix) \
           and scan[6][1] == 'define ' + guard \
           and scan[-2][1] == '/* %s */' % guard

def get_header_guard(filename):
    """Get prefix and suffix for header guard"""
    guard_prefix = "IMP"
    module = 'IMP'
    m = re.search('modules\/(\w+)\/', filename)
    if m:
        module = m.group(1)
        guard_prefix += module.upper()
    base = os.path.basename(filename)
    # For convenience remove leading module name qualifier if present
    for prefix in (module, '_'):
        if base.startswith(prefix):
            base = base[len(prefix):]
    def repl(match):
        return match.group(1).upper() + '_' + match.group(2)
    # Convert CamelCase into CAPS_SEPARATED_BY_UNDERSCORES
    guard_suffix = re.subn('([a-z]+|[A-Z]{2,})([A-Z0-9])', repl,
                           base)[0].upper()[:-2] + '_H'
    return guard_prefix, guard_suffix

def check_header_start_end(scan, filename, errors):
    guard_prefix, guard_suffix = get_header_guard(filename)
    if not have_header_guard(scan) \
       or not header_guard_ok(scan, guard_prefix, guard_suffix):
        header_guard = guard_prefix + '_' + guard_suffix
        errors.append('%s:%d: Missing or incomplete header guard.' \
                      % (filename, 1) + """
Header files should start with a comment, then a blank line, then the rest
of the file wrapped with a header guard. This must start with %s
and end with %s - in between can be placed extra qualifiers, e.g. for a
namespace. For example,

/** Copyright and file description */

#ifndef %s
#define %s
...
#endif  /* %s */
""" % (guard_prefix, guard_suffix, header_guard, header_guard, header_guard))