PDB Parser Code
#!/usr/bin/perl
#############################################################################
# File Name: pdbparser.pl
# Contents: PDBParser will convert a Bioinformatics's PDB file
# format to xml format, based on a supplied DTD.
#
# Author: Catalina Price
# Creation Date: 20040308
# Code Origination: Catalina Price
# Origination Date: 20040308
# Last Modified by: Catalina Price
# Modification Date: 20040315
# Project: URI(tm) Universal Research Interchange Format
# Legal: Copyright (C) 2004, URI, Bioinformatics, CSC592
#############################################################################
# Current draw backs in reading a DTD:
# 1. The pdbparser does not handle NameSpaces
# 2. The pdbparser does not handle the shorthand of the Element Tag, Ex.
# 3. The pdbparser does not handle multiple attributes
# placed in a single !ATTLIST tag.
##########################################################
# Setup local system syntax variables.
##########################################################
use strict;
use Cwd;
#use File::Copy;
#use File::Path;
use IO::Handle; # NT appears to not be autoflushing
STDERR->autoflush(1); # STDERR, so we do it explicitly.
##########################################################
# Prototypes.
##########################################################
sub PrintVersion;
sub GetFileName;
sub ReadDTDFile;
sub BuildDTDTree;
sub ReviewDTDTreeForErrors;
sub BuildNode;
sub PrintElementReport;
sub PDB1LetterSeqs;
#########################################
# Tools Section
#########################################
sub GetUserResponse;
sub PrintFormatList;
##########################################################
# Internal Global variables and flags. READ ONLY
##########################################################
# File and Directory Names
##########################
#my $FileName = "DTD_URI.dtd";
#my $FileName = "test-attr.dtd";
my $FileNamePDB = "";
my $FileNameDTD = "";
my $g_PDBParserVersion = "2.0-1";#PDBParser Version control number.
my %g_DeclarationTypes = (
'Element' => ' or
'Attribute' => '
'Entity' => '
);
#########################
# Hash keys and Values.
#########################
my $PDB_EXT = "ent";
my $DTD_EXT = "dtd";
my $DTD_LINENUM = "DTDLine:";
## Hash Entry Types - %ElementTreeNode
my $ELEMENT_NAME = 'ElementName';
my $ELEMENT_CATAGORY = 'ElementCatagory';
my $ELEMENT_LINE_NUM = 'ElementLineNum';
my $ref_PARENTNODE = 'refParentNode';
my $ref_CHILDNODE_LIST = 'refChildNodeList';
my $ref_ATTRIBUTE_LIST = 'refAttributeList';
## Element Catagory (s)
my $ELEM_CAT_LIST = 'LIST';
my $ELEM_CAT_PND_PCDATA = '#PCDATA';
my $ELEM_CAT_ANY = 'ANY';
my $ELEM_CAT_EMPTY = 'EMPTY';
## ATTRIB Value types Explanation
my $ATTRIB_TYPE_CDATA = 'CDATA'; #The value is character data.
my $ATTRIB_TYPE_ENUM = 'ENUM'; #The value must be one from an enumerated list.
my $ATTRIB_TYPE_ID = 'ID'; #The value is a unique id.
my $ATTRIB_TYPE_IDREF = 'IDREF'; #The value is the id of another element.
my $ATTRIB_TYPE_IDREFS = 'IDREFS'; #The value is a list of other ids.
my $ATTRIB_TYPE_NMTOKEN = 'NMTOKEN'; #The value is a valid XML name.
my $ATTRIB_TYPE_NMTOKENS = 'NMTOKENS'; #The value is a list of valid XML names.
my $ATTRIB_TYPE_ENTITY = 'ENTITY'; #The value is an entity.
my $ATTRIB_TYPE_ENTITIES = 'ENTITIES'; #The value is a list of entities.
my $ATTRIB_TYPE_NOTATION = 'NOTATION'; #The value is a name of a notation.
my $ATTRIB_TYPE_XML = 'xml:'; #The value is a predefined xml value
## ATTRIB Default Value types
my $ATTRIB_DEFVALUE_REQUIRED = '#REQUIRED';#The attribute value must be included in the element
my $ATTRIB_DEFVALUE_IMPLIED = '#IMPLIED'; #The attribute does not have to be included
my $ATTRIB_DEFVALUE_FIXED = '#FIXED'; #value ??????????? The attribute value is fixed
## ERROR Types
my $g_ERROR_DTD_NOERROR = 0;
my $g_ERROR_DTD_READ = 1;
my $g_ERROR_DTD_BUILD = 2;
my $g_DTDTreeRoot = "ROOT";
my $g_RootElement = "";
my $g_RootLineNum = 0;
my @g_ElementDeclarationList = ();
my @g_ElementNameDeclarationList = ();
my @g_RealizedAssociatedElementNameList = ();
my $g_RealizedAssociatedElementNameCount = 0;
my $g_ProclaimedAssociatedElementCount = 0;
my @g_AttributeDeclarationList = ();
my @g_AttributeNameDeclarationList = (); #Not really used.!!!!!!!
my $g_AttributesAssociatedWithElementCount = 0;
my @g_AttributesAssociatedWithElements = ();
my @g_EntityNameList = ();
my @g_EntityDeclarationList = ();
##########################################################
# Debugging variable.
# 1 = TRUE Sends all output to sreen.
# 0 = FALSE Sends all output to logfile
##########################################################
my $bRunTimeDebug = 0;
##########################################################
exit(!MainFunction());
#########################################################
# Main()
#########################################################
sub MainFunction{
my %DTDTree = ($ELEMENT_NAME => '',
$ELEMENT_CATAGORY => '',
$ELEMENT_LINE_NUM => '',
$ref_PARENTNODE => '',
$ref_CHILDNODE_LIST => '',
$ref_ATTRIBUTE_LIST => '');
my @EmptyAttributeList = ();
my $StatusResult = "";
my @StatusResultMsg = ();
my $ReturnStatus = "";
PrintVersion();
$FileNamePDB = GetFileName($PDB_EXT);
$FileNameDTD = GetFileName($DTD_EXT);
$DTDTree{$ref_ATTRIBUTE_LIST} = \@EmptyAttributeList;
($StatusResult, @StatusResultMsg) = ReadDTDFile($FileNameDTD);
if ( 0 == $StatusResult){
$ReturnStatus = $StatusResult;
ReviewDTDTreeForErrors($g_ERROR_DTD_READ, @StatusResultMsg);
STDERR->print("PDB Parser has exited due to DTD errors.\n");
return($ReturnStatus);
}#if
$DTDTree{$ELEMENT_NAME} = $g_RootElement;
$DTDTree{$ELEMENT_LINE_NUM} = $g_RootLineNum;
$DTDTree{$ref_PARENTNODE} = \$g_DTDTreeRoot;
$g_ProclaimedAssociatedElementCount++;
($StatusResult, @StatusResultMsg) = BuildDTDTree(\%DTDTree);
if ( 0 == $StatusResult){
$ReturnStatus = $StatusResult;
ReviewDTDTreeForErrors($g_ERROR_DTD_BUILD, @StatusResultMsg);
STDERR->print("PDB Parser has exited due to DTD errors.\n");
return($ReturnStatus);
}#if
if (0 == ($StatusResult = ReviewDTDTreeForErrors($g_ERROR_DTD_NOERROR, @StatusResultMsg))){
STDERR->print("PDB Parser has exited due to DTD errors.\n");
return($ReturnStatus = $StatusResult);
}#if
PDB1LetterSeqs($FileNamePDB);
#.....
# print("\nList of Elements.\n");
# PrintFormatList(80, "Elem: ", @g_ElementNameDeclarationList);
# print("\nList of Attributes.\n");
# PrintFormatList(80, "Attr: ", @g_AttributeNameDeclarationList);
# print("\nList of Entities.\n");
# PrintFormatList(80, "Attr: ", @g_EntityNameList);
# PrintFormatList(80, "Raw Element List: ", @g_ElementDeclarationList);
# my $tmpOutput = "";
# foreach $tmpOutput (@g_ElementDeclarationList){
# print("Raw Element List: $tmpOutput\n");
# }#foreach
# print("\nList of Attributes. @g_AttributeDeclarationList\n");
# print("\nList of Attributes. @g_EntityDeclarationList\n");
STDERR->print("\nPDB Parser has completed with Success.\n");
$ReturnStatus = !$StatusResult;
return($ReturnStatus);
}# MainFunction()
############################################################################
# FUNCTION: PrintVersion()
# PURPOSE: Prints Version information about PDBParser.
# PARAMETERS: None.
# RETURN: None.
############################################################################
sub PrintVersion{
my $FormatStr = "";
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Entering function PrintVersion()\n");}
$FormatStr = sprintf ("%-10s", $g_PDBParserVersion);
STDERR->print("****************************************************************************************\n");
STDERR->print("*** PDBParser ***\n");
STDERR->print("*** Version: $FormatStr ***\n");
STDERR->print("*** ***\n");
STDERR->print("*** PDBParser will convert a Bioinformatics's PDB file format to xml format, based ***\n");
STDERR->print("*** on a supplied DTD. ***\n");
STDERR->print("*** ***\n");
STDERR->print("*** Project: URI(tm) Universal Research Interchange Format ***\n");
STDERR->print("*** ***\n");
STDERR->print("*** Legal: Copyright (C) 2004, URI, Bioinformatics, CSC592 ***\n");
STDERR->print("*** ***\n");
STDERR->print("****************************************************************************************\n");
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Exiting function PrintVersion()\n");}
}#PrintVersion()
################################################################################################
########### ###########
########### DTD Section ###########
########### ###########
################################################################################################
############################################################################
# FUNCTION: ReadDTDFile()
# PURPOSE: Reads the DTD data structure.
# PARAMETERS: None.
#
# RETURN: TRUE for success, otherwise FALSE.
############################################################################
sub ReadDTDFile{
my $FileName = "";
my $Line = "";
my $LineCopy = "";
my $TagType = "";
my $ElementName = "";
my $ElementDeclaration = "";
my $TmpElementName = "";
my $AttributeName = "";
my $AttributeDeclaration = "";
my $TmpAttributeName = "";
my @TmpAttributeList = ();
my $CurrentAttributeLine = "";
my $EntityName = "";
my $EntityDeclaration = "";
my $EntityName = "";
my $EntityValue = "";
my $bLookingForFirstElement = 1; #Must be True.
my $bFoundDuplicate = 0; #Must be False.
my $ReturnStatus = 1; #Must be True.
my @returnStatusMsg = ();
my $LineCount = 0; #Must be zero.
my $LineNum = 0;
my $TmpLine = "";
($FileName) = @_;
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Entering function ReadDTDFile()\n");}
if (open(FILE,"$FileName")){
while ($Line = ){
$Line =~ s/\s+$//;
$LineCount++;
SWITCH: {
if ( $Line =~ /^$g_DeclarationTypes{Element}/ ){
## Remove Tag Information
$ElementDeclaration = $Line;
$ElementDeclaration =~ s/^$g_DeclarationTypes{Element}//;
$ElementDeclaration =~ s/^\s+//;
($ElementDeclaration)=split(/\>/,$ElementDeclaration);
$ElementDeclaration =~ s/\s+$//;
$ElementDeclaration =~ s/\s+/ /g;
## Get Element Name
$ElementName = $ElementDeclaration;
$ElementDeclaration = "$ElementDeclaration $DTD_LINENUM $LineCount";
($ElementName) = split(/\(/,$ElementName);
($ElementName) = split(/\s+/,$ElementName);
$ElementName =~ s/\s+$//;
## Ensure no duplicates
if( !(grep /^$ElementName$/, @g_ElementNameDeclarationList) ){
if ($bRunTimeDebug){print("FOUND NEW ELEMENT: $ElementDeclaration\n");}
push @g_ElementNameDeclarationList, $ElementName;
push @g_ElementDeclarationList, $ElementDeclaration;
if ($bLookingForFirstElement){
$g_RootElement = $ElementName;
$g_RootLineNum = $LineCount;
$bLookingForFirstElement = 0;
}#if
}else{
($TmpLine, $LineNum) = split(/$DTD_LINENUM\s+/,$ElementDeclaration);
push @returnStatusMsg, "Duplicate Element ($LineNum): $TmpLine\n";
$ReturnStatus = 0;
}#else
last SWITCH;
}#if
if ( $Line =~ /^$g_DeclarationTypes{Attribute}/ ){
## Remove Tag Information
$AttributeDeclaration = $Line;
$AttributeDeclaration =~ s/^$g_DeclarationTypes{Attribute}//;
$AttributeDeclaration =~ s/^\s+//;
($AttributeDeclaration)=split(/\>/,$AttributeDeclaration);
$AttributeDeclaration =~ s/\s+$//;
$AttributeDeclaration =~ s/\s+/ /g;
## Get Element and Attribute Name
($ElementName, $AttributeName)=split(/\s+/,$AttributeDeclaration);
$AttributeDeclaration = "$AttributeDeclaration $DTD_LINENUM $LineCount";
## Ensure no duplicates
$bFoundDuplicate = 0;
@TmpAttributeList = grep /^$ElementName/, @g_AttributeDeclarationList;
foreach $CurrentAttributeLine (@TmpAttributeList) {
($TmpElementName, $TmpAttributeName) = split(/\s+/,$CurrentAttributeLine);
$TmpElementName =~ s/^\s+//;
$TmpElementName =~ s/\s+$//;
if (($ElementName eq $TmpElementName) && ($AttributeName eq $TmpAttributeName)){
($TmpLine, $LineNum) = split(/$DTD_LINENUM\s+/,$CurrentAttributeLine);
push @returnStatusMsg, "Duplicate Attribute ($LineNum): $TmpLine\n";
$ReturnStatus = 0;
$bFoundDuplicate = 1;
}#if
}#foreach
if(!($bFoundDuplicate)){
push @g_AttributeNameDeclarationList, $AttributeName;
if ($bRunTimeDebug){print("FOUND NEW ATTRIBUTE: \"$AttributeDeclaration\"\n");}
push @g_AttributeDeclarationList, $AttributeDeclaration;
}#if
last SWITCH;
}#if
if ( $Line =~ /^$g_DeclarationTypes{Entity}/ ){
## Remove Tag Information
$EntityDeclaration = $Line;
$EntityDeclaration =~ s/^$g_DeclarationTypes{Entity}//;
$EntityDeclaration =~ s/^\s+//;
($EntityDeclaration)=split(/\>/,$EntityDeclaration);
$EntityDeclaration =~ s/\s+$//;
$EntityDeclaration =~ s/\s+/ /g;
## Get Entity Name
($EntityName)=split(/\s+/,$EntityDeclaration);
$EntityDeclaration = "$EntityDeclaration $DTD_LINENUM $LineCount";
## Ensure no duplicates
if( !(grep /^$EntityName$/, @g_EntityNameList)){
push @g_EntityNameList, $EntityName;
if ($bRunTimeDebug){print("FOUND NEW ENTITY: \"$EntityDeclaration\"\n");}
push @g_EntityDeclarationList, $EntityDeclaration;
}else{
($TmpLine, $LineNum) = split(/$DTD_LINENUM\s+/,$EntityDeclaration);
push @returnStatusMsg, "Duplicate Entity ($LineNum): $TmpLine\n";
$ReturnStatus = 0;
if ($bRunTimeDebug){print("ERROR INFO: Duplicate Entity: \"$EntityName\"\n");}
}#else
last SWITCH;
}#if
}#SWITCH
}#while
close FILE;
}#if
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Exiting function ReadDTDFile()\n");}
return ($ReturnStatus, @returnStatusMsg);
}#ReadDTDFile()
############################################################################
# FUNCTION: BuildDTDTree()
# PURPOSE: Build a tree of the DTD.
# PARAMETERS: $refCurrentElementNode - [IN] A hash reference containing a CurrentElementNode hash.
#
# RETURN: TRUE for success, otherwise FALSE.
############################################################################
sub BuildDTDTree{
my $refCurrentElementNode = 0;
my %CurrentElementNode = [];
my @SubElementDeclarationList = ();
my $TmpElementLine = "";
my $TmpLine = "";
my $TmpElementName = "";
my $Junk = "";
my $ParentElementName = "";
my $ParentLineNum = "";
my $CurrentElementName = "";
my $CurrentCategory = "";
my $LineNum = "";
my @CurrentChildNodeList = ();
my @TmpCurrentChildNodeList = ();
my $TmpChildNode = "";
my @TmpAttributeList = ();
my $TmpAttributeLine = "";
my @CurrentAttributeList = ();
my $bStopSearch = 0; # Must be FALSE.
my $StatusResult = 0;
my @StatusResultMsg = ();
my @returnStatusMsg = ();
my $FormatMsg = "";
($refCurrentElementNode) = @_;
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Entering function BuildDTDTree()\n");}
%CurrentElementNode = %{$refCurrentElementNode};
$CurrentElementName = $CurrentElementNode{$ELEMENT_NAME};
#Get the list of attributes for the current element, if any.
@TmpAttributeList = grep /^$CurrentElementName/, @g_AttributeDeclarationList;
foreach $TmpAttributeLine (@TmpAttributeList){
($TmpAttributeLine, $LineNum) = split(/$DTD_LINENUM\s+/,$TmpAttributeLine);
($TmpElementName, $Junk) = split(/\s+/,$TmpAttributeLine);
$TmpElementName =~ s/^\s+//;
$TmpElementName =~ s/\s+$//;
if ($TmpElementName eq $CurrentElementName){
$g_AttributesAssociatedWithElementCount++;
if ($bRunTimeDebug){print("FOUND ATTRIBUTE: Current Attribute line: $TmpAttributeLine\n");}
push @g_AttributesAssociatedWithElements, $TmpAttributeLine;
$TmpAttributeLine =~ s/^$CurrentElementName//;
$TmpAttributeLine =~ s/^\s+//;
push @CurrentAttributeList, $TmpAttributeLine;
}#if
}#foreach
$CurrentElementNode{$ref_ATTRIBUTE_LIST} = \@CurrentAttributeList;
#Should always be only one since an integrity check has been done.
@SubElementDeclarationList = grep /^$CurrentElementName/, @g_ElementDeclarationList;
if ( 0 == scalar(@SubElementDeclarationList)) {
$ParentElementName = ${%{$CurrentElementNode{$ref_PARENTNODE}}}{$ELEMENT_NAME};
$ParentLineNum = ${%{$CurrentElementNode{$ref_PARENTNODE}}}{$ELEMENT_LINE_NUM};
$FormatMsg = "Proclaimed Child ($ParentLineNum): Parent Element \"$ParentElementName\" " .
"proclaimed a child Element \"$CurrentElementName\"\n";
push @returnStatusMsg, $FormatMsg;
$ParentLineNum =~ s/\S/ /g;
$FormatMsg = " $ParentLineNum but none exist in the Element Declaration list.\n";
push @returnStatusMsg, $FormatMsg;
return(0, @returnStatusMsg);
}else{
if ($bRunTimeDebug){print("New List of Elements lines: @SubElementDeclarationList\n");}
while (($TmpElementLine = shift(@SubElementDeclarationList)) && (!$bStopSearch)){
($TmpElementLine, $LineNum) = split(/$DTD_LINENUM\s+/,$TmpElementLine);
$TmpLine = $TmpElementLine;
($TmpLine, $Junk) = split(/\(/,$TmpLine);
if ($bRunTimeDebug){print("Split Line: \"$TmpLine\", \"$Junk\" \n");}
($TmpElementName, $Junk) = split(/\s+/,$TmpLine);
$TmpElementName =~ s/\s+$//;
if ($bRunTimeDebug){print("Split Line: \"$TmpElementName\", \"$Junk\" \n");}
if ($TmpElementName eq $CurrentElementName){
$g_RealizedAssociatedElementNameCount++;
$CurrentElementNode{$ELEMENT_LINE_NUM} = $LineNum;
my $TmpBuffer = $TmpElementName; #???????????????? is my needed??????????
push @g_RealizedAssociatedElementNameList, $TmpBuffer;
if ($bRunTimeDebug){print("FOUND ELEMENT: Current Element line: $TmpElementLine\n");}
$TmpElementLine =~ s/^$CurrentElementName//;
$TmpElementLine =~ s/^\s+//;
$TmpElementLine =~ s/^\(//;
$TmpElementLine =~ s/^\s+//;
$TmpElementLine =~ s/\s+$//;
$TmpElementLine =~ s/\)$//;
$TmpElementLine =~ s/\s+$//;
$CurrentCategory = $TmpElementLine;
$bStopSearch = 1;
}#if
}#while
}#else
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: CurrentCategory: $CurrentCategory\n");}
SWITCH: {
if ($CurrentCategory eq $ELEM_CAT_PND_PCDATA){
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Element: $CurrentElementName Catagory: $CurrentCategory\n");}
$CurrentElementNode{$ELEMENT_CATAGORY} = $ELEM_CAT_PND_PCDATA;
PrintElementReport(\%CurrentElementNode);
last SWITCH;
}#if
if ($CurrentCategory eq $ELEM_CAT_ANY){
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Element: $CurrentElementName Catagory: $CurrentCategory\n");}
$CurrentElementNode{$ELEMENT_CATAGORY} = $ELEM_CAT_ANY;
PrintElementReport(\%CurrentElementNode);
last SWITCH;
}#if
if ($CurrentCategory eq $ELEM_CAT_EMPTY){
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Element: $CurrentElementName Catagory: $CurrentCategory\n");}
$CurrentElementNode{$ELEMENT_CATAGORY} = $ELEM_CAT_EMPTY;
PrintElementReport(\%CurrentElementNode);
last SWITCH;
}#if
default:{
if ($bRunTimeDebug){
STDERR->print("\nDEBUG INFO: Element: $CurrentElementName Catagory: $CurrentCategory\n");}
$CurrentElementNode{$ELEMENT_CATAGORY} = $ELEM_CAT_LIST;
(@CurrentChildNodeList) = split(/,/,$CurrentCategory);
@TmpCurrentChildNodeList = @CurrentChildNodeList;
@CurrentChildNodeList = ();
foreach $TmpChildNode (@TmpCurrentChildNodeList) {
$TmpChildNode =~ s/^\s+//;
$TmpChildNode =~ s/\s+$//;
push @CurrentChildNodeList, $TmpChildNode;
}#foreach
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: ChildNodeList: @CurrentChildNodeList\n");}
$CurrentElementNode{$ref_CHILDNODE_LIST} = \@CurrentChildNodeList;
PrintElementReport(\%CurrentElementNode);
foreach $TmpChildNode (@CurrentChildNodeList) {
$TmpChildNode =~ s/^\s+//;
$TmpChildNode =~ s/\s+$//;
$TmpChildNode =~ s/\+$//;
$TmpChildNode =~ s/\*$//;
$TmpChildNode =~ s/\?$//;
$g_ProclaimedAssociatedElementCount++;
my %NewElementNode = ($ELEMENT_NAME => '',
$ELEMENT_CATAGORY => '',
$ELEMENT_LINE_NUM => '',
$ref_PARENTNODE => '',
$ref_CHILDNODE_LIST => '',
$ref_ATTRIBUTE_LIST => '');
my @EmptyAttributeList = ();
$NewElementNode{$ELEMENT_NAME} = $TmpChildNode;
$NewElementNode{$ref_PARENTNODE} = \%CurrentElementNode;
$NewElementNode{$ref_ATTRIBUTE_LIST} = \@EmptyAttributeList;
($StatusResult, @StatusResultMsg) = BuildDTDTree(\%NewElementNode);
if ( 0 == $StatusResult){
push @returnStatusMsg, @StatusResultMsg;
return(0, @returnStatusMsg);
}#if
$CurrentElementNode{$TmpChildNode} = \%NewElementNode;
}#foreach
last SWITCH;
}#default
}#SWITCH
%{$refCurrentElementNode} = %CurrentElementNode;
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Exiting function BuildDTDTree()\n");}
return(1, ());
}#BuildDTDTree()
############################################################################
# FUNCTION: ReviewDTDTreeForErrors()
# PURPOSE: Ensure all declared Elements, Attribute, and Entities have
# been incorporated into the DTD tree.
# PARAMETERS: $ - [IN]
#
# RETURN: None.
############################################################################
sub ReviewDTDTreeForErrors{
my $ref_ErrorMsg = 0;
my $ElementDeclarationCount = 0;
my $TmpElementLine = "";
my $TmpAssociatedAttributeLine = "";
my $TmpElementName = 0;
my $AttributeDeclarationCount= 0;
my $TmpAttributeName = "";
my $AttributeAssociatedCount = 0;
my $TmpAttributeLine = 0;
my $OrphanElementCount = 0;
my $OrphanAttributeCount = 0;
my $EntityCount = 0;
my $TmpElementName = "";
my $TmpAssocElementName = "";
my $TmpAssocAttributeName = "";
my $Junk = 0;
my @AttributeWithUndeclaredElementList = ();
my @AttributeWithUnAssociatedAncestorList = ();
my @SubAssociatedAttributeList = ();
my @TmpElementList = ();
my @TmpElementNameList = ();
my @TmpAttributeList = ();
my $TmpLine = "";
my $bStopPrinting = 0; #MUST BE FALSE
my $bFoundInList = 0;
my $PrintCount = 0;
my $ErrorStatus = 0;
my @Msg = ();
my $MsgLine = "";
my $LineNum = 0;
my $ReturnStatus = 1; #MUST BE TRUE
($ErrorStatus, @Msg) = @_;
$ElementDeclarationCount = scalar (@g_ElementDeclarationList);
$AttributeDeclarationCount = scalar (@g_AttributeDeclarationList);
$EntityCount = scalar (@g_EntityDeclarationList);
$OrphanElementCount = $ElementDeclarationCount - $g_RealizedAssociatedElementNameCount;
$OrphanAttributeCount = $AttributeDeclarationCount - $g_AttributesAssociatedWithElementCount;
print("\n");
print("******************************************\n");
print("**** General DTD Information ****\n");
print("******************************************\n\n");
print("************************************\n");
print("**** DTD File Declared Definitions:\n");
print("************************************\n");
print(" Element Count: $ElementDeclarationCount\n");
print(" Attributes Count: $AttributeDeclarationCount\n");
print(" Entity Count: $EntityCount\n\n");
if ($ErrorStatus != $g_ERROR_DTD_READ){
print("***************************\n");
print("**** DTD Tree Definitions:\n");
print("***************************\n");
print(" Root Element: $g_RootElement \n");
print(" Proclaimed Elements: $g_ProclaimedAssociatedElementCount\n");
print(" Associated Elements: $g_RealizedAssociatedElementNameCount\n");
print(" Orphaned Elements: $OrphanElementCount\n\n");
print(" Associated Attributes: $g_AttributesAssociatedWithElementCount\n");
print(" Orphaned Attributes: $OrphanAttributeCount\n\n");
}#if
SWITCH: {
if ($ErrorStatus eq $g_ERROR_DTD_READ){
print("******************************************************************\n");
print("**** Error: Errors were detected while reading the DTD file ****\n");
print("******************************************************************\n");
foreach $MsgLine (@Msg){
print($MsgLine);
}#foreach
$ReturnStatus = 0;
last SWITCH;
}#if
if ($ErrorStatus eq $g_ERROR_DTD_BUILD){
print("********************************************************************\n");
print("**** Error: Errors were detected while building the DTD tree. ****\n");
print("********************************************************************\n");
foreach $MsgLine (@Msg){
print($MsgLine);
}#foreach
$ReturnStatus = 0;
last SWITCH;
}#if
default:{
if (($OrphanElementCount > 0) || ($OrphanAttributeCount > 0)){
print("****************************************************************************\n");
print("**** Error: The DTD tree was built without all declared definitions. ****\n");
print("**** ****\n");
print("**** Element orphans exist when it or one of its ancestors are ****\n");
print("**** not proclaimed by a parent Element. Note: The number of ****\n");
print("**** Proclaimed and Associated Elements to determine the number ****\n");
print("**** of disassociated trees branches possibly causing multiple ****\n");
print("**** orphans. Attribute orphans exist when its Element is not ****\n");
print("**** declared or a parent Element does not proclaim the ****\n");
print("**** Attribute's Element as its child Element. ****\n");
print("****************************************************************************\n");
if ($OrphanElementCount > 0){ #Orphan
$ReturnStatus = 0;
print("\n");
print("****************************\n");
print("**** Unassociated Elements:\n");
print("****************************\n");
@TmpElementList = @g_ElementDeclarationList;
while (($TmpElementLine = shift(@TmpElementList)) && (!$bStopPrinting)){
($TmpElementLine, $LineNum) = split(/$DTD_LINENUM\s+/,$TmpElementLine);
($TmpElementName, $TmpAttributeName) = split(/\s+/,$TmpElementLine);
$TmpElementName =~ s/^\s+//;
$TmpElementName =~ s/\s+$//;
if (0 == (grep /^$TmpElementName$/, @g_RealizedAssociatedElementNameList)){
$TmpElementLine =~ s/^\s+//;
$TmpElementLine =~ s/\s+$//;
print("Orphan Element ($LineNum): \"$TmpElementLine\"\n");
$PrintCount++;
}#if
if ($PrintCount >= 25 ){
print("\n");
print(" ...\n");
print(" ...\n");
print(" ...\n");
$bStopPrinting = 1;
}#if
}#while
$bStopPrinting = 0;
$PrintCount = 0;
}#if
if ($OrphanAttributeCount > 0){ #Orphan
$ReturnStatus = 0;
@TmpAttributeList = @g_AttributeDeclarationList;
while (($TmpAttributeLine = shift(@TmpAttributeList)) && (!$bStopPrinting)){
($TmpAttributeLine, $LineNum) = split(/$DTD_LINENUM\s+/,$TmpAttributeLine);
($TmpElementName, $TmpAttributeName) = split(/\s+/,$TmpAttributeLine);
$TmpElementName =~ s/^\s+//;
$TmpElementName =~ s/\s+$//;
$TmpAttributeName =~ s/^\s+//;
$TmpAttributeName =~ s/\s+$//;
## Orphan due to its Element was not declaired.
if (0 == (grep /^$TmpElementName$/, @g_ElementNameDeclarationList)){
$TmpAttributeLine =~ s/^$TmpElementName//;
$TmpAttributeLine =~ s/^\s+//;
$TmpAttributeLine =~ s/\s+$//;
$TmpLine = "Orphan Attribute ($LineNum): ($TmpElementName) \"$TmpAttributeLine\"\n";
push @AttributeWithUndeclaredElementList, $TmpLine;
$PrintCount++;
}else{
$bFoundInList = 0;
@SubAssociatedAttributeList = grep /$TmpAttributeName/, @g_AttributesAssociatedWithElements;
foreach $TmpAssociatedAttributeLine (@SubAssociatedAttributeList) {
($TmpAssocElementName,$TmpAssocAttributeName) = split(/\s+/,$TmpAssociatedAttributeLine);
$TmpAssocElementName =~ s/^\s+//;
$TmpAssocElementName =~ s/\s+$//;
$TmpAssocAttributeName =~ s/^\s+//;
$TmpAssocAttributeName =~ s/\s+$//;
if (($TmpElementName eq $TmpAssocElementName) &&
($TmpAttributeName eq $TmpAssocAttributeName)){
$bFoundInList = 1;
}#if
}#foreach
# Orphan due to an ancestor is an orphan.
if ( $bFoundInList == 0){
$TmpAttributeLine =~ s/^$TmpElementName//;
$TmpAttributeLine =~ s/^\s+//;
$TmpAttributeLine =~ s/\s+$//;
$TmpLine = "Orphan Attribute ($LineNum): ($TmpElementName) \"$TmpAttributeLine\"\n";
push @AttributeWithUnAssociatedAncestorList, $TmpLine;
$PrintCount++;
}#if
}#else
if ($PrintCount >= 25 ){$bStopPrinting = 1; }
}#while
if (0 != scalar(@AttributeWithUndeclaredElementList)){
print("\n");
print("***************************************\n");
print("**** Attribute's Element not declared:\n");
print("***************************************\n");
foreach $TmpLine (@AttributeWithUndeclaredElementList){
print($TmpLine);
}#foreach
}#if
if (0 != scalar(@AttributeWithUnAssociatedAncestorList)){
print("\n");
print("*****************************************************\n");
print("**** Attribute's Element or ancestor not proclaimed:\n");
print("*****************************************************\n");
foreach $TmpLine (@AttributeWithUnAssociatedAncestorList){
print($TmpLine);
}#foreach
}#if
if ($bStopPrinting == 1 ){
print("\n");
print(" ...\n");
print(" ...\n");
print(" ...\n");
}#if
$bStopPrinting = 0;
$PrintCount = 0;
}#if
}#if (($OrphanElementCount > 0) || ($OrphanAttributeCount > 0))
last SWITCH;
}#default
}#SWITCH
print("\n");
print("******************************************\n");
print("**** End of General DTD Information ****\n");
print("******************************************\n\n");
if ($bRunTimeDebug){STDERR->print("\nDEBUG: INFO: Exiting function PrintTotalsReport()\n");}
return ($ReturnStatus);
}#ReviewDTDTreeForErrors
############################################################################
# FUNCTION: PrintElementReport()
# PURPOSE: To print an element.
# PARAMETERS: $refCurrentElementNode - [IN] A hash reference containing a CurrentElementNode hash.
#
# RETURN: None.
############################################################################
sub PrintElementReport{
my $refCurrentElementNode = 0;
my %CurrentElementNode = [];
my @CurrentChildNodeList = ();
my @CurrentAttributeList = ();
my $TmpAttribute = "";
($refCurrentElementNode) = @_;
%CurrentElementNode = %{$refCurrentElementNode};
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Entering function PrintElementReport()\n");}
STDERR->print("\n*********************\n");
STDERR->print("Element: $CurrentElementNode{$ELEMENT_NAME}\n");
if (0 != @{$CurrentElementNode{$ref_ATTRIBUTE_LIST}}){
@CurrentAttributeList = @{$CurrentElementNode{$ref_ATTRIBUTE_LIST}};
foreach $TmpAttribute (@CurrentAttributeList) {
STDERR->print("Attributes: $TmpAttribute\n");
}#foreach
}else{
STDERR->print("Attributes: None\n");
}#if
STDERR->print("Category: $CurrentElementNode{$ELEMENT_CATAGORY}\n");
if ($CurrentElementNode{$ELEMENT_CATAGORY} eq $ELEM_CAT_LIST){
@CurrentChildNodeList = @{$CurrentElementNode{$ref_CHILDNODE_LIST}};
PrintFormatList(80, "Child Elements: ", @CurrentChildNodeList);
}#if
STDERR->print("*********************\n");
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Exiting function PrintElementReport()\n");}
}#PrintElementReport()
################################################################################################
########### ###########
########### PDB Section ###########
########### ###########
################################################################################################
############################################################################
# FUNCTION: PDB1LetterSeqs()
# PURPOSE: Reads the PDB file, finds the SEQRES section and returns all the found sequences as 1 letter chains.
# PARAMETERS: None.
#
# RETURN: TRUE for success, otherwise FALSE.
############################################################################
sub PDB1LetterSeqs{
my @Lines = "";
my $ReturnStatus = 1; #Must be True.
my $FileName = "";
my @TmpList = ();
my $Junk = "";
my $Chain = "";
my $TmpChain = "";
my $CurrentLine = "";
my @ChainLine = ();
my @New_ChainLine = ();
my $ChainCount = 0;
my $Converted_Chain = "";
my $TmpElement = "";
my $NewElement = "";
my %Seq3To1 = ('ALA' => 'A', 'ASX' => 'B', 'CYS' => 'C', 'ASP' => 'D',
'GLU' => 'E', 'PHE' => 'F', 'GLY' => 'G', 'HIS' => 'H',
'ILE' => 'I', 'LYS' => 'K', 'LEU' => 'L', 'MET' => 'M',
'ASN' => 'N', 'PRO' => 'P', 'GLN' => 'Q', 'ARG' => 'R',
'SER' => 'S', 'THR' => 'T', 'VAL' => 'V', 'TRP' => 'W',
'XAA' => 'X', 'TYR' => 'Y', 'GLX' => 'Z', 'TER' => '*',
'SEL' => 'U'
);
($FileName) = @_;
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Entering function PDB1LetterSeqs()\n");}
if (open(FILE,"$FileName")){
@Lines=;
close FILE;
@TmpList = grep /^SEQRES/, @Lines;
$CurrentLine = $TmpList[0];
($Junk, $Junk, $TmpChain) = split(/\s+/,$CurrentLine);
foreach $CurrentLine (@TmpList) {
($Junk, $Junk, $Chain) = split(/\s+/,$CurrentLine);
if($TmpChain eq "$Chain"){
($Junk, $Junk, $Junk, $Junk, @ChainLine) = split(/\s+/,$CurrentLine);
$ChainCount = scalar(@ChainLine);
#splice(@ChainLine, ($ChainCount-2));
foreach $TmpElement (@ChainLine){
$NewElement = $Seq3To1{$TmpElement};
push @New_ChainLine, $NewElement;
}#foreach
}#if($TmpChain eq "$Chain")
else{
$Converted_Chain = join '', @New_ChainLine;
STDERR->print("\n\nNew converted $TmpChain Sequence:\n");
STDERR->print("$Converted_Chain\n\n");
$TmpChain = $Chain;
$Converted_Chain = "";
@New_ChainLine = ();
($Junk, $Junk, $Junk, $Junk, @ChainLine) = split(/\s+/,$CurrentLine);
$ChainCount = scalar(@ChainLine);
#splice(@ChainLine, ($ChainCount-2));
foreach $TmpElement (@ChainLine){
$NewElement = $Seq3To1{$TmpElement};
push @New_ChainLine, $NewElement;
}#foreach
}#else
}#foreach
$Converted_Chain = join '', @New_ChainLine;
STDERR->print("\n\nNew converted $TmpChain Sequence:\n");
STDERR->print("$Converted_Chain\n\n");
}#if (open(FILE,"$FileName"))
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Exiting function PDB1LetterSeqs()\n");}
return ($ReturnStatus);
}#PDB1LetterSeqs()
################################################################################################
########### ###########
########### Code Section ###########
########### Tools ###########
################################################################################################
############################################################################
# FUNCTION: GetFileName()
# PURPOSE: To get an input file from the user.
# PARAMETERS: $FileName - [IN] Variable with the file extention type.
# RETURN: Input file to be used.
############################################################################
sub GetFileName{
my @TempFileList = ();
my @ValidResponses = ();
my $FileName = "";
my $FileType = "";
my $FriendlyFileName = "";
($FileType) = @_;
if ($bRunTimeDebug){STDERR->print("\nDEBUG: INFO: Entering function GetFileName()\n");}
SWITCH: {
if ($FileType eq $PDB_EXT){
STDERR->print("\nThe name of a Bioinformatics's PDB input file will be needed.\n");
$FriendlyFileName = "Bioinformatics PDB";
last SWITCH;
}#if
if ($FileType eq $DTD_EXT){
STDERR->print("\nThe name of a Bioinformatics's DTD input file will be needed.\n");
$FriendlyFileName = "Bioinformatics DTD";
last SWITCH;
}#if
default:{
STDERR->print("\nAn internal error has occurred due to processing an unknown\n");
STDERR->print("file extention type: $FileType\n");
STDERR->print("\nPDBParser will exit.\n");
exit(1);
last SWITCH;
}#default:
}#SWITCH
@ValidResponses = ();
STDERR->print("Here is a list of input files in the current directory.\n");
opendir(DIR, cwd());
@TempFileList = grep /\.$FileType$/, readdir DIR;
@TempFileList = sort @TempFileList;
closedir(DIR);
if ( 0 < (scalar(@TempFileList))){
PrintFormatList(80, "$FriendlyFileName ($FileType): ", @TempFileList);
push @ValidResponses, @TempFileList;
}else{
PrintFormatList(80, "$FriendlyFileName ($FileType): ", ("No Files"));
}#else
@TempFileList = ();
STDERR->print("\n");
SWITCH: {
if (3 > (scalar(@ValidResponses))){
$FileName = GetUserResponse("Your response for the input file is invalid.\n", 0, 1, @ValidResponses);
last SWITCH;
}#if
if (3 <= (scalar(@ValidResponses))){
$FileName = GetUserResponse("Your response for the input file is invalid.\n", 0, 0, @ValidResponses);
last SWITCH;
}#if
}#SWITCH
if ($bRunTimeDebug){STDERR->print("\nDEBUG: INFO: Exiting function GetFileName()\n");}
return($FileName);
}#GetFileName()
############################################################################
# FUNCTION: GetUserResponse()
# PURPOSE: To get an interactive response from the user.
# PARAMETERS: $InvalidResponseStatement - [IN] A response statement for an
# invalid user selection.
# $DefaultResponseIndex - [IN] An index into @ValidResponses
# displayed as a default selection.
# $bShowValidResponses - [IN] Displays @ValidResponses on the prompt.
# @ValidResponses - [IN] List of valid user responses.
#
# REMARK: If no valid response is provided, no integrity check will be
# performed and the User response will be returned. If a valid
# response list is provided an integrity check will be performed.
# If a valid response list is provided, the caller may choose to
# have the list displayed on the response prompt. If a valid response
# list is provided and the $DefaultResponseIndex is valid, a
# default Response will be display and the User may press return
# to accept this response. Note: If a $DefaultResponseIndex is
# valid, it will always be displayed, otherwise it is never displayed.
#
# RETURN: The User's response.
############################################################################
sub GetUserResponse{
my $UserResponse = "";
my $InvalidResponseStatement = "";
my @ValidResponses = ();
my $FormattedResponses = "";
my $nValidResponses = 0;
my $DefaultResponseIndex = 0;
my $DefaultResponse = "";
my $bShowValidResponses = 0; # Must be FALSE.
my $bShowResponsesOnPrompt = 0; # Must be FALSE.
my $bHaveValidDefaultResponse = 0; # Must be FALSE.
my $bHaveValidResponses = 0; # Must be FALSE.
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Entering function GetUserResponse()\n");}
($InvalidResponseStatement, $DefaultResponseIndex, $bShowValidResponses, @ValidResponses ) = @_;
$nValidResponses = scalar(@ValidResponses);
if ( 1 <= $nValidResponses){
$bHaveValidResponses = 1;
}#if
if ((0 <= $DefaultResponseIndex) && ($bHaveValidResponses) && ($DefaultResponseIndex <= ($nValidResponses - 1))){
$bHaveValidDefaultResponse = 1;
$DefaultResponse = @ValidResponses[$DefaultResponseIndex];
}#if
if ($bShowValidResponses == 1){
$bShowResponsesOnPrompt = 1;
}#if
if ($bRunTimeDebug){
PrintFormatList(80, "DEBUG INFO: ValidResponses ", @ValidResponses);
STDERR->print("DEBUG INFO:nValidResponses: $nValidResponses\n");
STDERR->print("DEBUG INFO:DefaultResponseIndex: $DefaultResponseIndex\n");
STDERR->print("DEBUG INFO:bShowValidResponses: $bShowValidResponses\n");
STDERR->print("DEBUG INFO:DefaultResponse: $DefaultResponse\n");
}#if
$FormattedResponses = join '|', @ValidResponses;
SWITCH: {
if ($bShowResponsesOnPrompt == 1 && $bHaveValidDefaultResponse == 1){
STDERR->print("Please specify [$FormattedResponses]($DefaultResponse):");
last SWITCH;
}#if
if ($bShowResponsesOnPrompt == 1 && $bHaveValidDefaultResponse == 0){
STDERR->print("Please specify [$FormattedResponses]:");
last SWITCH;
}#if
if ($bShowResponsesOnPrompt == 0 && $bHaveValidDefaultResponse == 1){
STDERR->print("Please specify []($DefaultResponse):");
last SWITCH;
}#if
if ($bShowResponsesOnPrompt == 0 && $bHaveValidDefaultResponse == 0){
STDERR->print("Please specify []():");
last SWITCH;
}#if
}#SWITCH
if (STDIN->eof){
STDERR->print("\nReceived EOF from STDIN, suspecting it is due to CTRL-C/CTRL-D, exiting PDB Parser.\n");
exit;
}#if
$UserResponse = STDIN->getline;
$UserResponse =~ s/\s+$//;
if ($bHaveValidResponses == 1){
while((!(grep /^$UserResponse$/, @ValidResponses)) && (!($UserResponse eq "" && $bHaveValidDefaultResponse))){
PrintFormatList(100, "***ERROR - ", $InvalidResponseStatement);
SWITCH: {
if ($bShowResponsesOnPrompt == 1 && $bHaveValidDefaultResponse == 1){
STDERR->print("Please specify [$FormattedResponses]($DefaultResponse):");
last SWITCH;
}#if
if ($bShowResponsesOnPrompt == 1 && $bHaveValidDefaultResponse == 0){
STDERR->print("Please specify [$FormattedResponses]:");
last SWITCH;
}#if
if ($bShowResponsesOnPrompt == 0 && $bHaveValidDefaultResponse == 1){
STDERR->print("Please specify []($DefaultResponse):");
last SWITCH;
}#if
if ($bShowResponsesOnPrompt == 0 && $bHaveValidDefaultResponse == 0){
STDERR->print("Please specify []():");
last SWITCH;
}#if
}#SWITCH
if (STDIN->eof){
STDERR->print("\nReceived EOF for STDIN, suspecting it is due to CTRL-C/CTRL-D, exiting PDB Parser.\n");
exit;
}#if
$UserResponse = STDIN->getline;
$UserResponse =~ s/\s+$//;
}#while
if ($UserResponse eq "" && $bHaveValidDefaultResponse){
$UserResponse = $DefaultResponse;
}#if
}# ($bHaveValidResponses == 1)
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Exiting function GetUserResponse()\n");}
return($UserResponse);
}#GetUserResponse()
############################################################################
# FUNCTION: PrintFormatList()
# PURPOSE: To print out a list of string tokens (SPACE separated) onto
# multiple lines with a given line length.
# PARAMETERS: $UserMaxLineLenght - [IN] Variable to receive the suggested
# line lenght. This will be reset if
# if greater than $MAX_LINE_LENGTH.
# $UserHeader - [IN] User header for each line printed
# or Empty String for no header.
# @List - [IN] List of tokens to be printed.
# RETURN: None.
############################################################################
sub PrintFormatList{
my $MAX_LINE_LENGTH = 100;
my $UserMaxLineLength = 0;
my $LineHeader = "";
my @List = ();
my $LineHeaderLength = 0;
my $bNewLine = 1;
my $LineLength = 0;
my $Token = "";
my $TokenLength = 0;
($UserMaxLineLength, $LineHeader, @List) = @_;
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Entering function PrintFormatList()\n");}
if ($bRunTimeDebug){STDERR->print("DEBUG INFO: Arg List:($UserMaxLineLength, $LineHeader, @List)\n");}
$LineHeaderLength = length($LineHeader);
if($UserMaxLineLength > $MAX_LINE_LENGTH){$UserMaxLineLength = $MAX_LINE_LENGTH;}
if($LineHeaderLength <= $UserMaxLineLength){
foreach $Token (@List){
$TokenLength = length($Token);
if ($bNewLine == 1){
$LineLength = $LineHeaderLength + $TokenLength;
if($LineLength <= $UserMaxLineLength){
STDERR->print("\n$LineHeader$Token ");
$LineLength = $LineLength + 1;#Add Space
$bNewLine = 0;
}else{
STDERR->print("\n$LineHeader");
$LineLength = $LineHeaderLength;
$bNewLine = 1;
}#else
}else{
$LineLength = $LineLength + $TokenLength;
if( $LineLength <= $UserMaxLineLength){
STDERR->print("$Token ");
$LineLength = $LineLength + 1;#Add Space
}else{
$LineLength = $LineHeaderLength + $TokenLength;
if($LineLength <= $UserMaxLineLength){
STDERR->print("\n$LineHeader$Token ");
$LineLength = $LineLength + 1;#Add Space
$bNewLine = 0;
}else{
STDERR->print("\n$LineHeader");
$LineLength = $LineHeaderLength;
$bNewLine = 1;
}#else
}#else
}#else
}#foreach
STDERR->print("\n");
}#if
if ($bRunTimeDebug){STDERR->print("\nDEBUG INFO: Exiting PrintFormatList()\n");}
}#PrintFormatList()