#!/usr/bin/env python


import sys, os
import IMP.isd
from IMP.isd.utils import Load, read_sequence_file
#from Isd.io.nomenclature import IUPAC_CONVENTION
IUPAC_CONVENTION='iupac'
TYPE_AMINO_ACID = 'AMINO_ACID'
pseudoatoms_dict = IMP.isd.get_data_path('CHARMM_pseudoatoms.dict')

def del_comment(x):

    n = x.find('!')

    if n >= 0:
        x = x[:n]

    return x

def check_assigns(x):
    return 'resid' in x and 'name' in x

class TBLReader:

    atom_dict = {'segid': '',
                 'resid': -1,
                 'name': ''}

    pseudoatom_char = '*', '%', '#'

    def __init__(self, sequence, ignore_warnings=False, sequence_match=(1,1)):

        self.sequence = sequence
        #sequence_match = (a,b) a: NOE numbering, b: sequence numbering
        self.offset = sequence_match[1]-sequence_match[0]
        self.ignore = ignore_warnings
        self.pseudo_dict = Load(pseudoatoms_dict)

    def extract_contributions(self, contribs):

        new_contribs = []

        for c in contribs:

            if not c:
                continue

            c = c[c.find('('):]
            c = c[:c.rfind(')')+1]

            new_contribs.append(c)

        return new_contribs

    def split_contribution(self, contrib):

        words = contrib.split('(')
        atoms = [word.split(')')[0] for word in words if word]

        return atoms

    def resolve_pseudoatom(self, residue_type, atom_name):

        if '*' in atom_name:
            char = '*'
        elif '#' in atom_name:
            char = '#'

        atom_name = atom_name.replace(char, '%')

        ## TODO: Assumes that pseudo-atom names are compatible with
        ##       IUPAC name, since pseudo_dict can handle IUPAC
        ##       names only.

        try:
            group = self.pseudo_dict[residue_type][atom_name]

        except:

            key = atom_name, residue_type

            if not key in self.missing_atoms:

                msg = 'Could not resolve pseudoatom %s.%s.' % (residue_type, atom_name)

                if self.ignore:
                    print msg
                else:
                    raise KeyError, msg

                self.missing_atoms.append(key)

            return atom_name

        return group

    def to_iupac(self, residue_type, atom_name):

        raise NotImplementedError

        iupac_name = self.thesaurus.convert_atom(residue_type,
                                                 atom_name,
                                                 self.naming_system,
                                                 IUPAC_CONVENTION,
                                                 TYPE_AMINO_ACID)
        try:
            iupac_name = self.thesaurus.convert_atom(residue_type,
                                                     atom_name,
                                                     self.naming_system,
                                                     IUPAC_CONVENTION,
                                                     TYPE_AMINO_ACID)

        except:

            key = atom_name, residue_type

            if not key in self.missing_atoms:

                if '*' in atom_name or '#' in atom_name:

                    msg = 'Pseudoatoms not upported: %s' % atom_name

                    if self.ignore:
                        print msg

                    else:
                        raise KeyError, msg

                elif self.ignore:
                    msg = 'Warning: atom %s not found in residue %s.' % key
                    print msg
                else:
                    raise KeyError, msg % key

                self.missing_atoms.append(key)

            return atom_name

        return iupac_name

    def resolve_dihedral_name(self, atoms):

        raise NotImplementedError

        names = [a['name'] for a in atoms]

        try:
            res_type = self.sequence[atoms[1]['resid']]

        except IndexError:
            print 'Residue number overflow in atoms', atoms
            return ''

        for dihedral in self.connectivity[res_type].dihedrals.values():

            keys = [k for k in dihedral.keys() if 'atom' in k]
            keys.sort()

            atom_names = []

            for k in keys:
                name = dihedral[k]
                if name[-1] in ('-', '+'):
                    name = name[:-1]

                atom_names.append(name)

            if atom_names == names:
                return dihedral['name']

        msg = 'Could not determine name of dihedral angles defined by atoms %s.' % str(names)

        if self.ignore:
            print msg
            return ''

        raise KeyError, msg

    def extract_atom(self, a):

        atom = dict(self.atom_dict)

        words = a.split()

        skip_next = False

        ## correct for segid statements

        words = [x for x in words if x <> '"']

        for i in range(len(words)):

            if skip_next:
                skip_next = False
                continue

            word = words[i]

            if word == 'and':
                continue

            for key in atom.keys():

                if key in word:

                    if key == 'segid':
                        atom[key] = words[i+1][:-1]
                    else:
                        atom[key] = words[i+1]

                    skip_next = True
                    break

            else:
                raise KeyError, 'Value or keyword "%s" unknown. Source: "%s", decomposed into "%s"' % \
                      (word, str(a), str(words))

        atom['resid'] = int(atom['resid']) + self.offset
        atom['name'] = atom['name'].upper()

        return atom

    def build_contributions(self, atoms):

        groups = []

        for a in atoms:

            try:
                res_type = self.sequence[a['resid']]

            except IndexError:
                print 'Residue number overflow in atoms', atoms
                return []

            atom_name = a['name']

            if atom_name[-1] in self.pseudoatom_char:
                group = self.resolve_pseudoatom(res_type, atom_name)

            else:
                #group = [self.to_iupac(res_type, atom_name)]
                group = [atom_name]

            groups.append(group)

        group1, group2 = groups

        contribs = []

        res_1 = atoms[0]['resid']
        res_2 = atoms[1]['resid']

        for i in range(len(group1)):

            name_1 = group1[i]

            for j in range(len(group2)):

                if (res_1, name_1) <> (res_2, group2[j]):
                    contribs.append(((res_1, name_1), (res_2, group2[j])))

        return contribs

    def extract_target_values(self, line):

        end = line.rfind(')')

        values = line[end+1:].split()

        try:
            distances = [float(x) for x in values[:3]]
        except:
            distances = None

        ## read volume from ARIA 1.x restraint files

        val = line.split('volume=')

        if len(val) > 1:
            volume = float(val[1].split()[0].split(',')[0])
        else:

            volume = None

        return distances, volume

    def read_contents(self, filename):

        keywords = 'class',

        filename = os.path.expanduser(filename)

        f = open(filename)
        lines = f.readlines()
        f.close()

        all = ''

        for x in lines:

            x = x.strip()

            if not x or x[0] == '!':
                continue

            not_valid = [kw for kw in keywords if kw in x]

            if not_valid:
                continue

            all += x.lower() + ' '

        return [x.strip() for x in all.split('assi')]

    def find_contributions(self, line):

        contribs = [del_comment(x).strip() for x in line.split('or')]

        ## use alternative parser for implicitly listed atom pairs

        if 1 in [x.count('resid') for x in contribs]:

            atoms = []

            while line:

                start = line.find('(')

                if start < 0:
                    atoms[-1][-1] += line
                    break

                stop = line.find(')')

                selection = [x.strip() for x in line[start:stop+1].split('or')]

                for i in range(len(selection)):

                    val = selection[i]

                    if not '(' in val:
                        val = '(' + val

                    if not ')' in val:
                        val += ')'

                    selection[i] = val

                atoms.append(selection)

                line = line[stop+1:]

            if len(atoms) <> 2:
                raise

            ## find and isolate target distances

            l = []

            for i in range(len(atoms)):

                g = []

                for atom in atoms[i]:

                    n = atom.rfind(')')

                    if n >= 0 and len(atom[n+1:].strip()) > 3:
                        distances = atom[n+1:].strip()
                        atom = atom[:n+1]

                    g.append(atom)

                l.append(g)

            a, b = l

            if len(a) > len(b):
                a, b = b, a

            contribs = []

            for i in a:
                for j in b:
                    contribs.append('%s %s' % (i,j))

            contribs[0] += ' ' + distances

        return contribs

    def create_distance_restraint(self, distances, volume, contributions):
        if distances is None and volume is None:
            raise ValueError, "could not find either volume or "\
                        "distance: %s %s %s" % (distances,volume,contributions)
        if distances is None:
            distances = [volume**(-1./6),0,0]
        dist = distances[0]
        if volume is None:
            volume = dist ** (-6)
        lower = dist - distances[1]
        upper = dist + distances[2]
        return (tuple(contributions), dist, lower, upper, volume)

    def read_distances(self, filename, key, naming_system=IUPAC_CONVENTION,
                       decompose=False):
        """reads a tbl file and parses distance restraints.
        """

        self.naming_system = naming_system

        assigns = self.read_contents(filename)

        restraints = []
        self.missing_atoms = []
        seq_number = 0

        for line in assigns:

            contribs = self.find_contributions(line)

            if False in [check_assigns(x) for x in contribs]:
                continue

            distances, volume = self.extract_target_values(contribs[0])

            if (distances is None and volume is None):
                distances, volume = self.extract_target_values(contribs[-1])

            new_contribs = self.extract_contributions(contribs)

            contributions = []

            for contrib in new_contribs:

                atoms = self.split_contribution(contrib)
                atoms = [self.extract_atom(x) for x in atoms]

                contributions += self.build_contributions(atoms)

            if contributions:
                r = self.create_distance_restraint(distances, volume,
                    contributions)

                restraints.append(r)
                seq_number += 1

        if restraints:

            if decompose:

                d = decompose_restraints(restraints)

                for _type in d.keys():
                    if not d[_type]:
                        del d[_type]

                if len(d) > 1:
                    for _type, val in d.items():

                        if val:
                            new_key =  key + '_%s' % _type
                            d[new_key] = val

                        del d[_type]

                else:
                    d = {key: d.values()[0]}
            else:
                d = {key: restraints}

            return d

    def read_dihedrals(self, filename, key, naming_system=IUPAC_CONVENTION):

        self.naming_system = naming_system

        assigns = self.read_contents(filename)

        restraints = []
        self.missing_atoms = []
        seq_number = 0

        for line in assigns:

            contribs = [del_comment(x).strip() for x in line.split('or')]

            values, volume = self.extract_target_values(contribs[0])
            new_contribs = self.extract_contributions(contribs)

            if not new_contribs:
                continue

            if len(new_contribs) > 1:
                raise ValueError, 'Inconsistency in data file, multiple contributions detected.'

            atoms = self.split_contribution(new_contribs[0])
            atoms = [self.extract_atom(x) for x in atoms]

            name = self.resolve_dihedral_name(atoms)

            r = create_dihedral_restraint(seq_number, name, values, atoms)

            restraints.append(r)
            seq_number += 1

        if restraints:
            return restraints

    def read_rdcs(self, filename, key, naming_system=IUPAC_CONVENTION):

        self.naming_system = naming_system

        assigns = self.read_contents(filename)

        restraints = []
        self.missing_atoms = []
        seq_number = 0

        fake_atom_names = ('OO', 'X', 'Y', 'Z')

        for line in assigns:

            contribs = [del_comment(x).strip() for x in line.split('or')]
            distances, volume = self.extract_target_values(contribs[0])
            new_contribs = self.extract_contributions(contribs)

            contributions = []

            for contrib in new_contribs:

                atoms = self.split_contribution(contrib)
                atoms = [self.extract_atom(x) for x in atoms]

                atoms = [a for a in atoms if not a['name'] in fake_atom_names]

                contributions += self.build_contributions(atoms)

            if contributions:
                r = create_rdc_restraint(seq_number, distances[0], contributions)

                restraints.append(r)
                seq_number += 1

        if restraints:
            return restraints

if __name__ == '__main__':

    noe = 'noe.tbl'
    sequence = read_sequence_file('seq.dat', first_residue_number=1)
    reader = TBLReader(sequence, ignore_warnings=True)
    reader.read_distances(noe, key='test')