//
// This file is part of ProSMART.
//

#include "prosmartClass_PDBfile.h"

bool USE_DNARNA = 0;
bool ONLY_OUTPUT_SEQUENCE_FILES = 0;

ostream& operator<<(ostream& out, pdbline pdb)
{
    cout << pdb.atom << " "
    << pdb.alt << " "
    << pdb.resid << " "
    << pdb.chain << " "
    << pdb.res_num << " "
    << pdb.ins_code << " "
    << pdb.crd.x << " "
    << pdb.crd.y << " "
    << pdb.crd.z << " "
    << pdb.occup << " "
    << pdb.bfact << " "
    << pdb.element << " "
    << pdb.charge << " "
    << pdb.asa << " "
    << pdb.sigma
    << endl;
    return out;
}

ostream& operator<<(ostream& out, PDBfile& pdb)
{
    cout << endl;
    for(int i=0; i<pdb.size(); i++){
        cout << i+1 << " " << pdb.line(i);
    }
    cout << endl;
    return out;
}

ostream& operator<<(ostream& out, res_corresp& res)
{
    cout << res.res << res.ins;
    return out;
}

ostream& operator<<(ostream& out, vector<res_corresp>& v)
{
    cout << endl;
    for(unsigned int i=0; i<v.size(); i++){
        cout << i << " " << v[i] << endl;
    }
    cout << endl;
    return out;
}

void PDBfile::write_formatted_pdb(string &file)
//writes formatted pdb info, ready for ProSMART ALIGN to read.
{
	ofstream outfile;

    /*string file_js = file+".js";
	outfile.open(file_js.c_str());
	if(outfile.is_open()){
        outfile << "var dat1 = new Array();" << endl;
		for(unsigned int i=0; i<pdbfile.size(); i++){
            outfile << "dat1[" << i << "] = new Array("
            << "'" << pdbfile[i].atom << "'," 
			<< "'" << pdbfile[i].alt << "',"
			<< "'" << pdbfile[i].resid << "',"
			<< "'" << pdbfile[i].chain << "',"
			<< "'" << pdbfile[i].res_num << "',"
			<< "'" << pdbfile[i].ins_code << "',"
			<< "'" << pdbfile[i].crd.x << "',"
			<< "'" << pdbfile[i].crd.y << "',"
			<< "'" << pdbfile[i].crd.z << "',"
			<< "'" << pdbfile[i].occup << "',"
			<< "'" << pdbfile[i].bfact << "',"
			<< "'" << pdbfile[i].element << "',"
			<< "'" << pdbfile[i].charge << "',"
            << "'" << pdbfile[i].orig_resnum << "',"
            << "'" << pdbfile[i].asa << "',"
            << "'" << pdbfile[i].sigma << "');\n";
		}
		outfile.close();
	} else {
		cout << endl << endl << "Error - cannot open " << file << " for writing." << endl << endl;
		exit(-1);
	}*/
    
	outfile.open(file.c_str());
	if(outfile.is_open()){
		for(unsigned int i=0; i<pdbfile.size(); i++){
            outfile << pdbfile[i].atom << "\t" 
			<< pdbfile[i].alt << "\t"
			<< pdbfile[i].resid << "\t"
			<< pdbfile[i].chain << "\t"
			<< pdbfile[i].res_num << "\t"
			<< pdbfile[i].ins_code << "\t"
			<< pdbfile[i].crd.x << "\t"
			<< pdbfile[i].crd.y << "\t"
			<< pdbfile[i].crd.z << "\t"
			<< pdbfile[i].occup << "\t"
			<< pdbfile[i].bfact << "\t"
			<< pdbfile[i].element << "\t"
			<< pdbfile[i].charge << "\t"
            << pdbfile[i].orig_resnum << "\t"
            << pdbfile[i].asa << "\t"
            << pdbfile[i].sigma << "\n";
		}
		outfile.close();
	} else {
		cout << endl << endl << "Error - cannot open " << file << " for writing." << endl << endl;
		exit(-1);
	}
	return;
}

void PDBfile::read_formatted_pdb(string &file)
{
	pdbline temp;
	ifstream infile(file.c_str());
	
	if(!infile){
		cout << endl << endl << "Error - cannot open " << file << " for reading." << endl << endl;
		exit(-1);
	}
	
    temp.asa = 0.0;
	pdbfile.clear();
	string tmp;
	while(!infile.eof()) {
		
		getline(infile,tmp,'\t');
		if(infile.eof()){break;}
		for(int i=0; i<5; i++){
			temp.atom[i] = tmp[i];
		}
		
		getline(infile,tmp,'\t');
		temp.alt = tmp[0];
		
		getline(infile,tmp,'\t');
		for(int i=0; i<4; i++){
			temp.resid[i] = tmp[i];
		}
		
		getline(infile,tmp,'\t');
		temp.chain = tmp[0];
		
		getline(infile,tmp,'\t');
		temp.res_num = str_to_int(tmp);
		
		getline(infile,tmp,'\t');
		temp.ins_code = tmp[0];
		
		getline(infile,tmp,'\t');
		temp.crd.x = str_to_double(tmp);
		getline(infile,tmp,'\t');
		temp.crd.y = str_to_double(tmp);
		getline(infile,tmp,'\t');
		temp.crd.z = str_to_double(tmp);
		
		getline(infile,tmp,'\t');
		temp.occup = str_to_double(tmp);
		
		getline(infile,tmp,'\t');
		temp.bfact = str_to_double(tmp);
        
        getline(infile,tmp,'\t');
		for(int i=0; i<3; i++){
			temp.element[i] = tmp[i];
		}
        
        getline(infile,tmp,'\t');
		for(int i=0; i<3; i++){
			temp.charge[i] = tmp[i];
		}
				
        getline(infile,temp.orig_resnum,'\t');
        
		getline(infile,tmp,'\t');
        temp.asa = str_to_double(tmp);
        
        getline(infile,tmp,'\n');
        temp.sigma = str_to_double(tmp);
        
		pdbfile.push_back(temp);
		
        //cout << temp;
	}
    
	infile.close();
	
	return;
}

void PDBfile::read_formatted_pdb_mainchain(string &file)
{
	pdbline temp;
	ifstream infile(file.c_str());
	
	if(!infile){
		cout << endl << endl << "Error - cannot open " << file << " for reading." << endl << endl;
		exit(-1);
	}
	
    temp.asa = 0.0;
	pdbfile.clear();
	string tmp;
	while(!infile.eof()) {
		
		getline(infile,tmp,'\t');
		if(infile.eof()){break;}
		for(int i=0; i<5; i++){
			temp.atom[i] = tmp[i];
		}
		
		getline(infile,tmp,'\t');
		temp.alt = tmp[0];
		
		getline(infile,tmp,'\t');
		for(int i=0; i<4; i++){
			temp.resid[i] = tmp[i];
		}
		
		getline(infile,tmp,'\t');
		temp.chain = tmp[0];
		
		getline(infile,tmp,'\t');
		temp.res_num = str_to_int(tmp);
		
		getline(infile,tmp,'\t');
		temp.ins_code = tmp[0];
		
		getline(infile,tmp,'\t');
		temp.crd.x = str_to_double(tmp);
		getline(infile,tmp,'\t');
		temp.crd.y = str_to_double(tmp);
		getline(infile,tmp,'\t');
		temp.crd.z = str_to_double(tmp);
		
		getline(infile,tmp,'\t');
		temp.occup = str_to_double(tmp);
		
		getline(infile,tmp,'\t');
		temp.bfact = str_to_double(tmp);
        
        getline(infile,tmp,'\t');
		for(int i=0; i<3; i++){
			temp.element[i] = tmp[i];
		}
        
        getline(infile,tmp,'\t');
		for(int i=0; i<3; i++){
			temp.charge[i] = tmp[i];
		}
		
        getline(infile,temp.orig_resnum,'\t');

        getline(infile,tmp,'\t');
        temp.asa = str_to_double(tmp);
        
        getline(infile,tmp,'\n');
        temp.sigma = str_to_double(tmp);
        
        
		//only want main chain atoms.
		if(strcmp(temp.atom," N  ")!=0 && strcmp(temp.atom," CA ")!=0 && strcmp(temp.atom," C  ")!=0 && strcmp(temp.atom," O  ")!=0){
            if(strcmp(temp.atom," O5'")!=0 && strcmp(temp.atom," C1'")!=0 && strcmp(temp.atom," C4'")!=0 && strcmp(temp.atom," O3'")!=0){
                continue;
            }
		}
		
		pdbfile.push_back(temp);
		
		/*cout << temp.atom << "|" 
		 << temp.alt << "|"
		 << temp.resid << "|"
		 << temp.chain << "|"
		 << temp.res_num << "|"
		 << temp.ins_code << "|"
		 << temp.crd.x << "|"
		 << temp.crd.y << "|"
		 << temp.crd.z << "|"
		 << temp.occup << "|"
		 << temp.bfact << "|"
		 << temp.orig_resnum	<< "|" << endl;*/
	}
	
	infile.close();
    
	return;
}


coord PDBfile::crd(int i)
{
    return pdbfile[i].crd;
}

int PDBfile::get_resnum(int idx)
{
    return pdbfile[idx].res_num;
}

void PDBfile::set_resnum(int &idx, int val)
{
    pdbfile[idx].res_num = val;
    return;
}

void PDBfile::swap_atoms(int &i, int &j)
{
    pdbline tmp = pdbfile[i];
    pdbfile[i] = pdbfile[j];
    pdbfile[j] = tmp;
    return;
}

char* PDBfile::get_resid(int idx)
{
    return pdbfile[idx].resid;
}

string PDBfile::get_atom(int idx)
{
    return pdbfile[idx].atom;
}

char PDBfile::get_chain(int idx)
{
    return pdbfile[idx].chain;
}

string PDBfile::get_element(int idx)
{
    return pdbfile[idx].element;
}

double PDBfile::get_bfact(int idx)
{
    return pdbfile[idx].bfact;
}

double PDBfile::get_occup(int idx)
{
    return pdbfile[idx].occup;
}

char PDBfile::get_alt(int idx)
{
    return pdbfile[idx].alt;
}

string PDBfile::get_orig_resnum(int idx)
{
    return pdbfile[idx].orig_resnum;
}

double PDBfile::get_asa(int idx)
{
    return pdbfile[idx].asa;
}

double PDBfile::get_sigma(int idx)
{
    return pdbfile[idx].sigma;
}

bool PDBfile::readPDB(string filein, char chain)
{
    string error_file = "";
    return readPDB(error_file,filein,chain);
}

bool PDBfile::readPDB(string &error_file, string filein) 
{
    return readPDB(error_file,filein,' ');
}

bool PDBfile::readPDB(string &error_file, string &filein, char chain)
{ 
    string line;
    pdbline newline;
    newline.asa = 0.0;
    newline.sigma = 0.0;
    string empty;
    stringstream ss;
    coord crdtemp;
    
    ifstream infile(filein.c_str(), ios::in);
    if(infile.is_open()){
		while(!infile.eof()){
            line.clear();
			getline(infile,line);
			
            if(line.size()<66)
                continue;         //minimum length to comply with requirements
            
			if(chain!=' ' && line[21]!=chain)continue;		//only take this chain, unless chain is not specified
			
            if(line.substr(0,6)!="ATOM  "){
                if(line.substr(0,6)!="HETATM"){
					continue;
				}
			}
            if(line.size()>=78){
                if(line[76]=='H'){continue;}	//dont want hydrogen atoms
                if(line[77]=='H'){continue;}
            }
            
            for(unsigned int i=0; i<4;i++){newline.atom[i] = line[12+i];}newline.atom[4]='\0';
            newline.alt = line[16];
            for(unsigned int i=0; i<3;i++){newline.resid[i] = line[17+i];}newline.resid[3]='\0';
            newline.chain = line[21];
            newline.res_num = atoi(line.substr(22,4).c_str());
            newline.ins_code = line[26];
            crdtemp.x = atof(line.substr(30,8).c_str());
            crdtemp.y = atof(line.substr(38,8).c_str());
            crdtemp.z = atof(line.substr(46,8).c_str());
            newline.crd = crdtemp;
            newline.occup = atof(line.substr(54,6).c_str());
            newline.bfact = atof(line.substr(60,6).c_str());
            
            if(line.size()>=78){
                for(unsigned int i=0; i<2;i++){newline.element[i] = line[76+i];}newline.element[2]='\0';
                if(line.size()>=80)
                    for(unsigned int i=0; i<2;i++){newline.charge[i] = line[78+i];}newline.charge[2]='\0';
            }
            empty="";
            ss.str(empty);
            ss << newline.res_num << newline.ins_code;
            newline.orig_resnum=ss.str();
            
            pdbfile.push_back(newline);
        }
        infile.close();
    } else {
        xml_entry(error_file,0,13,filein);
        cout << "Unable to open " << filein << " for reading" << endl;
    }
	
    if(pdbfile.size()==0){
        xml_entry(error_file,0,19,filein);
        cout << endl << "Error - could not read chain " << chain << " in file: " << filein;
		return 0;
    }
	
    return 1;
}

void PDBfile::merge_asa(PDBfile &pdb_asa)
//assume that ALL atoms/lines in pdb_asa are present in pdbfile, but the converse may not be true.
//assume that .asa is initialised to 0.0 for all atoms/lines.
{
    int idx = -1;
    stringstream ss1;
    stringstream ss2;
    for(int i=0; i<pdb_asa.size(); i++){
        pdbline tmp = pdb_asa.line(i);
        while(1){
            //go through all of the lines in pdbfile
            idx++;
            if((int)pdbfile.size() <= idx) break;

            //find line in pdbfile that matches tmp
            if(pdbfile[idx].res_num != tmp.res_num) continue;
            ss1.str("");
            ss2.str("");
            ss1 << pdbfile[idx].atom;
            ss2 << pdbfile[idx].atom;
            if(delete_spaces(ss1.str()) != delete_spaces(ss2.str())) continue;
            if(pdbfile[idx].ins_code != tmp.ins_code) continue;
            //found a match.
            pdbfile[idx].asa = tmp.bfact;   //asa is stores in bfact field in pdb_asa.
            break;
        }
        if((int)pdbfile.size() <= idx) break;
    }
    return;
}

void PDBfile::import_sigma(string &filein)
//assume that ALL atoms/lines in pdbfile are present in refmac sigma file, but the converse may not be true.
//assume that .sigma is initialised to 0.0 for all atoms/lines, and orig_resnum corresponds to resnum in refmac sigma file.
{
    if(pdbfile.size()==0)return;
    vector<vector<string> > data_in = read_file_lines(filein);

    //assume only interested in one chain - all chains in pdbfile are same
    for(unsigned int j=0; j<data_in.size(); j++){
        if(data_in[j].size()==8){
            if(data_in[j][4][0] == pdbfile[0].chain){
                continue;
            }
        }
        data_in.erase(data_in.begin()+j);
        j--;
    }
    
    /*cout << endl;
     for(unsigned int i=0; i<data_in.size(); i++){
     if(data_in[i].size()!=8)continue;
     cout << endl;
     cout << data_in[i][4][0] << "\t";
     cout << data_in[i][5] << "\t";
     cout << data_in[i][3] << "\t";
     cout << data_in[i][1] << "\t";
     cout << data_in[i][6];
     }
     cout << endl;*/
    
    for(unsigned int i=0; i<pdbfile.size(); i++){
        if(data_in.size()==0)break;
        string resnum = int_to_str(pdbfile[i].res_num);
        if(pdbfile[i].ins_code!=' ')resnum += pdbfile[i].ins_code;
        string atom = delete_spaces(pdbfile[i].atom);
        for(unsigned int j=0; j<data_in.size(); j++){
            //find line in data_in that matches
            if(data_in[j][5] != resnum) continue;
            if(data_in[j][3] != pdbfile[i].resid) continue;
            if(data_in[j][1] != atom) continue;
            //found a match.
            pdbfile[i].sigma = str_to_double(data_in[j][6]);
            data_in.erase(data_in.begin()+j);
            break;
        }
    }
    
    //cout << endl << "size: " << data_in.size() << endl;
    
    //output colour script
    /*ofstream outfile;
    string file = "sigma.pml";
    outfile.open(file.c_str());
    if(outfile.is_open()){
        outfile << "# ProSMART Colour File" << endl;
        outfile << "color white, all" << endl;
        //vector<string> obj;
        //for(unsigned int i=0; i<pdb_v.size(); i++){
        //    obj.push_back(get_filename(filenames[i]));
        //}
        
        double val = 0.0;
        double ival = 1.0;
        int NO_COL = 100;   //this will actually cause NO_COL+1 colours to be created...
        for(int i=0; i<=NO_COL; i++){
            val = (double)i/NO_COL;
            ival = 1.0-val;
            double color1 = ival+0.8*val;
            double color2 = ival;
            double color3 = 0.0;
            outfile << "set_color newcolor" << i << " = [" << color1 << "," << color2 << "," << color3 << "]" << endl;
        }
        
        double score_cutoff = 0.8;
        int index = -1;
        for(unsigned int i=0; i<pdbfile.size(); i++){
            val = (double)NO_COL*pdbfile[i].sigma/score_cutoff;
            if(val < 0.0){
                index = -1;
            } else {
                if(val < (double)NO_COL){
                    index = floor(val);
                } else {
                    index = NO_COL;
                }
                outfile << "color newcolor" << index << ", (*//*/" << pdbfile[i].res_num << "/" << delete_spaces(pdbfile[i].atom) << ")" << endl;
            }
        }
        
        outfile.close();
    }*/

    return;
}

void PDBfile::add(pdbline &newline)
{
    pdbfile.push_back(newline);
    return;
}

void PDBfile::erase(int &i)
{
    pdbfile.erase(pdbfile.begin()+i);
    return;
}

void PDBfile::erase(unsigned int &i)
{
    pdbfile.erase(pdbfile.begin()+i);
    return;
}

pdbline PDBfile::line(int idx)
{
    return pdbfile[idx];
}

int PDBfile::size()
{
    return pdbfile.size();
}

void PDBfile::clear()
{
    pdbfile.clear();
    return;
}

void PDBfile::mainchain(PDBfile &pdb)
{  
    int ca = -1;
    int c = -1;
    int n = -1;
    int o = -1;
    int current;
    
    pdb.clear();
    
    for(unsigned int ctr=0; ctr<pdbfile.size(); ctr++){
        current = pdbfile[ctr].res_num;
        
        if(strcmp(pdbfile[ctr].atom," CA ")==0 && ca!=current){
            ca = current;
            pdb.add(pdbfile[ctr]);
        } else {
            if(strcmp(pdbfile[ctr].atom," C  ")==0 && c!=current){
                c = current;
                pdb.add(pdbfile[ctr]);
            } else {
                if(strcmp(pdbfile[ctr].atom," N  ")==0 && n!=current){
                    n = current;
                    pdb.add(pdbfile[ctr]);
                } else {
                    if(strcmp(pdbfile[ctr].atom," O  ")==0 && o!=current){
                        o = current;
                        pdb.add(pdbfile[ctr]);
                    }
                }
            }
        }
    }
    
    int temp;
    bool valid = 1;
    for(int ctr=0; ctr<pdb.size(); ctr++){
		temp = pdb.get_resnum(ctr);
		if(temp != pdb.get_resnum(ctr+1)){valid = 0;break;}
		if(temp != pdb.get_resnum(ctr+2)){valid = 0;break;}
		if(temp != pdb.get_resnum(ctr+3)){valid = 0;break;}
		ctr += 3;
    }
    if(valid == 0){
        cout << endl << endl << "Warning - some mainchain atoms are missing from input protein chain. Continuing anyway." << endl << endl;
    }
    return;
}

/*void PDBfile::domain(PDBfile &pdb, char dom)
 {  
 pdb.clear();
 for(unsigned int ctr=0; ctr<pdbfile.size(); ctr++){
 if(pdbfile[ctr].chain == dom){
 pdb.add(pdbfile[ctr]);
 }
 }
 if(pdb.size()<=0){
 cout << endl << "Error - could not find chain " << dom << " in .pdb file";
 cout << endl << "Program terminated." << endl;
 exit(-1);
 }
 return;
 }*/

void PDBfile::trim(int n)
{
    for(unsigned int ctr=0; ctr<pdbfile.size(); ctr++){
        if(pdbfile[ctr].res_num >= n){
            pdbfile.erase(pdbfile.begin()+ctr);
            ctr--;
        }
    }
    
    return;
}

void PDBfile::filter_residues_by_range(vector<int> &RANGE1, vector<int> &RANGE2)
{
    int resnum;
    bool VALID;
    for(unsigned int ctr=0; ctr<pdbfile.size(); ctr++){
        resnum = pdbfile[ctr].res_num;
        VALID = 0;
        for(unsigned int i=0; i<RANGE1.size(); i++){
            if(resnum >= RANGE1[i] && resnum <= RANGE2[i]){
                VALID = 1;
                break;
            }
        }
        if(VALID == 0){
            pdbfile.erase(pdbfile.begin()+ctr);
            ctr--;
        }
    }
    
    return;
}

void PDBfile::filter_residues_by_removal(vector<string> &RM_RES)
{
    stringstream res_str;
    for(unsigned int ctr=0; ctr<pdbfile.size(); ctr++){
        res_str.str("");
        res_str << pdbfile[ctr].res_num << pdbfile[ctr].ins_code;
        for(unsigned int i=0; i<RM_RES.size(); i++){
            if(delete_spaces(res_str.str()) == RM_RES[i]){
                pdbfile.erase(pdbfile.begin()+ctr);
                ctr--;
                break;
            }
        }
    }
    
    return;
}

void PDBfile::getcoords(Coords &crds)
{
    crds.clear();
    for(unsigned int ctr=0; ctr<pdbfile.size(); ctr++){
        crds.add(pdbfile[ctr].crd);
    }
    return;
}

void PDBfile::getcoords(Coords &crds, vector<int> atoms)
{
    crds.clear();
    for(unsigned int ctr=0; ctr<atoms.size(); ctr++){
        crds.add(pdbfile[atoms[ctr]].crd);
    }
    return;
}

void PDBfile::getcoords(Coords &crds, vector<unsigned int> atoms)
{
    crds.clear();
    for(unsigned int ctr=0; ctr<atoms.size(); ctr++){
        crds.add(pdbfile[atoms[ctr]].crd);
    }
    return;
}

void PDBfile::filter_alt_atoms()
{
    if(pdbfile.size()==0)return;
    int temp_resnum = pdbfile[0].res_num;
    char temp_alt = pdbfile[0].alt;
    
    for(unsigned int i=1; i<pdbfile.size(); i++){
        if(pdbfile[i].res_num != temp_resnum){
            temp_resnum = pdbfile[i].res_num;
            temp_alt = pdbfile[i].alt;
        } else if(pdbfile[i].alt != ' '){
            if(temp_alt == ' '){
                temp_alt = pdbfile[i].alt;
            } else if(temp_alt != pdbfile[i].alt){
                pdbfile.erase(pdbfile.begin()+i);
                i--;
            }
        }
    }
    return;
}

void PDBfile::remove_low_occup(string &fileID, double x)
{
#ifdef OUTPUT_LOWOCCUP_ALT_COMPLETE_FILES
    vector<string> low_occ_atoms;
    vector<string> alt_atoms;
    for(unsigned int i=0; i<pdbfile.size(); i++){
        if(pdbfile[i].alt != ' '){
            string tmp = fileID + " " + int_to_str(pdbfile[i].res_num) + pdbfile[i].ins_code;
            if(alt_atoms.size()==0 || tmp!=alt_atoms.back())
                alt_atoms.push_back(tmp);
        }
       if(pdbfile[i].occup < x || (pdbfile[i].occup == 0 && x == 0) ){
            string tmp = fileID + " " + int_to_str(pdbfile[i].res_num) + pdbfile[i].ins_code;
            if(low_occ_atoms.size()==0 || tmp!=low_occ_atoms.back())
                low_occ_atoms.push_back(tmp);
            pdbfile.erase(pdbfile.begin()+i);
            i--;
        }
    }
    if(low_occ_atoms.size()>0){
        ofstream outfile;
        outfile.open(("log/" + fileID + "_occup.txt").c_str());
        if(outfile.is_open()){
            for(unsigned int i=0; i<low_occ_atoms.size(); i++){
                outfile << low_occ_atoms[i] << endl;
            }
            outfile.close();
        }
        cout << endl << "Warning - " << low_occ_atoms.size() << " atoms with low (" << x << ") occupancies have been removed from " << fileID << endl;
    }
    if(alt_atoms.size()>0){
        ofstream outfile;
        outfile.open(("log/" + fileID + "_alt.txt").c_str());
        if(outfile.is_open()){
            for(unsigned int i=0; i<alt_atoms.size(); i++){
                outfile << alt_atoms[i] << endl;
            }
            outfile.close();
        }
        cout << endl << "Warning - there are " << alt_atoms.size() << " residues with alternative conformations in " << fileID << endl;
    }
#else
    int ctr = 0;
    for(unsigned int i=0; i<pdbfile.size(); i++){
        if(pdbfile[i].occup < x || (pdbfile[i].occup == 0 && x == 0) ){
            pdbfile.erase(pdbfile.begin()+i);
            i--;
            ctr++;
        }
    }
    if(ctr>0)
        cout << endl << "Warning - " << ctr << " atoms with low (" << x << ") occupancies have been removed from " << fileID << endl;
#endif
    return;
}



void PDBfile::output_resid(char chain, string &fileout, vector<int> &RANGE1, vector<int> &RANGE2)
{
   ofstream outfile;
   int resnum;
   char inscode;
   char resid_code = 'X';
   
   outfile.open(fileout.c_str());
   if(outfile.is_open()){
      if(RANGE1.size()>0){
         outfile << "# Note: residue range is restricted to: ";
         for(unsigned int i=0; i<RANGE1.size(); i++){
            outfile << " [" << RANGE1[i] << "," << RANGE2[i] << "]";
         }
         outfile << endl;
      }
      if(!ONLY_OUTPUT_SEQUENCE_FILES){
         outfile << ">chain_" << chain << " " << endl;
      }
      resnum = pdbfile[0].res_num;
      inscode = pdbfile[0].ins_code;
      resid_code = convert_resid(pdbfile[0].resid);
      if(resid_code!='X'){
         outfile << resid_code;
      }
      
      int j=0;
      if(resid_code!='X'){
         j=1;
      }
      int k=0;
      for(unsigned int i=0; i<pdbfile.size(); i++){
         if(resnum < pdbfile[i].res_num || inscode != pdbfile[i].ins_code){
            resnum = pdbfile[i].res_num;
            inscode = pdbfile[i].ins_code;
            resid_code = convert_resid(pdbfile[i].resid);
            if(resid_code!='X'){
               outfile << resid_code;
               if(!ONLY_OUTPUT_SEQUENCE_FILES){
                  j++;
                  if(j%10==0){
                     k++;
                     if(k%6==0){
                        outfile << endl;
                     }
                     outfile << " ";
                  }
               }
            }
         }
      }
      outfile << endl;
      outfile.close();
   }
   
   return;
}

string PDBfile::get_sequence()
{
    string result;
    int resnum = pdbfile[0].res_num;
    char inscode = pdbfile[0].ins_code;
   
   char resid_code = convert_resid(pdbfile[0].resid);
   if(resid_code!='X'){
      result.push_back(resid_code);
   }
   
    result.push_back(convert_resid(pdbfile[0].resid));
    for(unsigned int i=1; i<pdbfile.size(); i++){
        if(resnum < pdbfile[i].res_num || inscode != pdbfile[i].ins_code){
            resnum = pdbfile[i].res_num;
            inscode = pdbfile[i].ins_code;
           resid_code = convert_resid(pdbfile[i].resid);
           if(resid_code!='X'){
              result.push_back(resid_code);
           }
        }
    }
    return result;
}

char convert_resid(char *resid)
{
    char result='X';
    
    if(strcmp(resid,"ALA")==0){
        result = 'A';
    } else if(strcmp(resid,"ARG")==0){
        result = 'R';
    } else if(strcmp(resid,"ASN")==0){
        result = 'N';
    } else if(strcmp(resid,"ASP")==0){
        result = 'D';
    } else if(strcmp(resid,"CYS")==0){
        result = 'C';
    } else if(strcmp(resid,"GLU")==0){
        result = 'E';
    } else if(strcmp(resid,"GLN")==0){
        result = 'Q';
    } else if(strcmp(resid,"GLY")==0){
        result = 'G';
    } else if(strcmp(resid,"HIS")==0){
        result = 'H';
    } else if(strcmp(resid,"ILE")==0){
        result = 'I';
    } else if(strcmp(resid,"LEU")==0){
        result = 'L';
    } else if(strcmp(resid,"LYS")==0){
        result = 'K';
    } else if(strcmp(resid,"MET")==0){
        result = 'M';
    } else if(strcmp(resid,"PHE")==0){
        result = 'F';
    } else if(strcmp(resid,"PRO")==0){
        result = 'P';
    } else if(strcmp(resid,"SER")==0){
        result = 'S';
    } else if(strcmp(resid,"THR")==0){
        result = 'T';
    } else if(strcmp(resid,"TRP")==0){
        result = 'W';
    } else if(strcmp(resid,"TYR")==0){
        result = 'Y';
    } else if(strcmp(resid,"VAL")==0){
        result = 'V';
    } else if(strcmp(resid,"MSE")==0){
        result = 'M';
    } else if(strcmp(resid,"  A")==0){
       result = 'A';
    } else if(strcmp(resid,"  C")==0){
       result = 'C';
    } else if(strcmp(resid,"  G")==0){
       result = 'G';
    } else if(strcmp(resid,"  T")==0){
       result = 'T';
    } else if(strcmp(resid,"  U")==0){
       result = 'U';
    } else if(strcmp(resid,"  a")==0){
       result = 'a';
    } else if(strcmp(resid,"  c")==0){
       result = 'c';
    } else if(strcmp(resid,"  g")==0){
       result = 'g';
    } else if(strcmp(resid,"  t")==0){
       result = 't';
    } else if(strcmp(resid,"  u")==0){
       result = 'u';
    }
    
    return result;
}

void PDBfile::rename_resnum(vector<res_corresp> &result1, unsigned short fraglen)
//renames residue numbers to expand insertion codes so that ALL residues are
//identified by a unique integer.
//the correspondence between the new residue numbering and the original is
//preserved and returned.
//the actual char insertion codes are unchanged.
//e.g. {1 ,1A,1B,2 } -> {1 ,2A,3B,4 }
{  
    res_corresp temp;
    vector<res_corresp> result;
    if(pdbfile.size()==0){return;}
    int idx=0;
    vector<coord> Ccrd;
    vector<coord> Ncrd;
    vector<double> CNdist;
    vector<int> gap;
    vector<int> offset;
    coord tmpcrd;
    
    temp.res = pdbfile[0].res_num;
    temp.ins = pdbfile[0].ins_code;
    result.push_back(temp);
    
    //pass 1: rename to account for insertion codes; get coords of N and C atoms
    for(unsigned int i=0; i<pdbfile.size(); i++){
        //cout << endl << i << " " << pdbfile[i].res_num << pdbfile[i].ins_code << " " << pdbfile[i].atom;
        if(pdbfile[i].res_num != result[idx].res){
            result.push_back(temp);
            idx++;
            result[idx].res = pdbfile[i].res_num;
            result[idx].ins = pdbfile[i].ins_code;
            //cout << " * " << idx << " " << result[idx].res << result[idx].ins;
        }
        if(pdbfile[i].ins_code != result[idx].ins){
            result.push_back(temp);
            idx++;
            result[idx].res = pdbfile[i].res_num;
            result[idx].ins = pdbfile[i].ins_code;
            //cout << " $ " << idx << " " << result[idx].res << result[idx].ins;
        }
        //cout << "  " << idx;
        
        if(strcmp(pdbfile[i].atom," C  ")==0){
            while((int)Ccrd.size() <= idx){
                Ccrd.push_back(tmpcrd);			//tmpcrd is blank
            }
            Ccrd[idx] = pdbfile[i].crd;
        }
        if(strcmp(pdbfile[i].atom," N  ")==0){
            while((int)Ncrd.size() <= idx){
                Ncrd.push_back(tmpcrd);			//tmpcrd is blank
            }
            Ncrd[idx] = pdbfile[i].crd;
        }
        
        pdbfile[i].res_num = idx;
        //cout << endl;
    }
    
    //get distances between residues
    idx=0;
    for(unsigned int i=1; i<result.size(); i++){
        while(gap.size() <= i){
            gap.push_back(-1);
            offset.push_back(idx);
        }
        gap[i]=0;
        if(Ccrd.size()>=i && Ncrd.size()>i){
            tmpcrd = Ccrd[i-1]-Ncrd[i];
            if(sqrt(dist2(tmpcrd)) > 2.0){
                idx+=fraglen;
                gap[i]=1;
            }
        }
        offset[i]=idx;
        //cout << endl << i << " " << result[i].res << "  " << dist(tmpcrd) << " " << gap[i] << " " << offset[i];
    }
    //the gap vector indicates where a gap (non-consecutive residues) occurs, according to proximity checking
    //the offset vector specifies how many residues the renamed-pdbfile needs to be offset accordingly
    
    //adjust the result vector by adding 'fraglen' empty entries
    res_corresp empty;
    empty.res = -1;
    result1.push_back(result[0]);
    for(unsigned int i=1; i<result.size(); i++){
        if(gap[i]>0){
            for(unsigned int j=0; j<fraglen; j++){
                result1.push_back(empty);
            }
        }
        result1.push_back(result[i]);
    }
    /*for(unsigned int i=0; i<result1.size(); i++){
     cout << endl << i << " " << result1[i].res;
     }*/
    
    //pass 2: rename to account for residues that are non-consecutive - this is done by proximity checking
    for(unsigned int i=0; i<pdbfile.size(); i++){
        //cout << endl << i << " " << pdbfile[i].res_num << pdbfile[i].ins_code << " " << pdbfile[i].atom;
        if(pdbfile[i].res_num > 0){
            //cout << "  " << offset[pdbfile[i].res_num];
            if(offset[pdbfile[i].res_num]>0){
                pdbfile[i].res_num += offset[pdbfile[i].res_num];
                //cout << " * " << pdbfile[i].res_num;
            }
        }
    }
    
    return;
}

double PDBfile::get_distance(int i, int j)
{
    coord temp = pdbfile[i].crd-pdbfile[j].crd;
    return dist(temp);
}

double PDBfile::get_av_bfact(int i, int j)
{
    double temp = (pdbfile[i].bfact+pdbfile[j].bfact)/2.0;
    return temp;
}

vector<coord> PDBfile::av_pos()
//returns a vector containing the average positions of residues (using all atoms in a residue)
{
    vector<coord> result;
    vector<Coords> temp;
    Coords empty;
    coord empty1;
	
    for(unsigned int i=0; i<pdbfile.size(); i++){
        while((int)temp.size()<=pdbfile[i].res_num){
			temp.push_back(empty);
		}
        temp[pdbfile[i].res_num].add(pdbfile[i].crd);
    }
    
    for(unsigned int i=0; i<temp.size(); i++){
        if(temp[i].size()>0){
            result.push_back(temp[i].mean());
		} else {
			result.push_back(empty1);
		}
    }
	
    return result;
}

int PDBfile::get_highest_resnum()
{
    int result = 0;
    for(unsigned int i=0; i<pdbfile.size(); i++){
        if(pdbfile[i].res_num>result){
			result = pdbfile[i].res_num;
		}
    }
    return result;
}

void write_original_res(string file, vector<res_corresp> &orig)
{
	ofstream outfile;
	
	outfile.open(file.c_str());
	if(outfile.is_open()){
		for(unsigned int i=0; i<orig.size(); i++){
			outfile << orig[i].res << "\t" 
			<< orig[i].ins << "\n";
		}
		outfile.close();
	} else {
		cout << endl << endl << "Error - cannot open " << file << " for writing." << endl << endl;
		exit(-1);
	}
	return;
}

vector<res_corresp> read_original_res(string file)
{
	ifstream infile(file.c_str());
	res_corresp temp;
	vector<res_corresp> result;
	
	string tmp;
	while(!infile.eof()){
		getline(infile,tmp,'\t');
		if(infile.eof()){break;}
		temp.res = str_to_int(tmp);
		
		getline(infile,tmp,'\n');
		temp.ins = tmp[0];		
		
		result.push_back(temp);
	}
	
	return result;
}
