############################################################################### # # Package: NaturalDocs::SymbolString # ############################################################################### # # A package to manage handling throughout the program. # ############################################################################### # This file is part of Natural Docs, which is Copyright © 2003-2010 Greg Valure # Natural Docs is licensed under version 3 of the GNU Affero General Public License (AGPL) # Refer to License.txt for the complete details use strict; use integer; package NaturalDocs::SymbolString; use Encode qw(encode_utf8 decode_utf8); # # Function: FromText # # Extracts and returns a from plain text. # # This should be the only way to get a from plain text, as the splitting and normalization must be consistent # throughout the application. # sub FromText #(string textSymbol) { my ($self, $textSymbol) = @_; # The internal format of a symbol is all the normalized identifiers separated by 0x1F characters. # Convert whitespace and reserved characters to spaces, and condense multiple consecutive ones. $textSymbol =~ tr/ \t\r\n\x1C\x1D\x1E\x1F/ /s; # DEPENDENCY: ReferenceString->MakeFrom() assumes all 0x1E characters were removed. # DEPENDENCY: ReferenceString->MakeFrom() assumes this encoding doesn't use 0x1E characters. # Remove spaces unless they're separating two alphanumeric/underscore characters. $textSymbol =~ s/^ //; $textSymbol =~ s/ $//; $textSymbol =~ s/(\W) /$1/g; $textSymbol =~ s/ (\W)/$1/g; # Remove trailing empty parenthesis, so Function and Function() are equivalent. $textSymbol =~ s/\(\)$//; # Split the string into pieces. my @pieces = split(/(\.|::|->)/, $textSymbol); my $symbolString; my $lastWasSeparator = 1; foreach my $piece (@pieces) { if ($piece =~ /^(?:\.|::|->)$/) { if (!$lastWasSeparator) { $symbolString .= "\x1F"; $lastWasSeparator = 1; }; } elsif (length $piece) { $symbolString .= $piece; $lastWasSeparator = 0; }; # Ignore empty pieces }; $symbolString =~ s/\x1F$//; return $symbolString; }; # # Function: ToText # # Converts a to text, using the passed separator. # sub ToText #(SymbolString symbolString, string separator) { my ($self, $symbolString, $separator) = @_; my @identifiers = $self->IdentifiersOf($symbolString); return join($separator, @identifiers); }; # # Function: ToBinaryFile # # Writes a to the passed filehandle. Can also encode an undef. # # Parameters: # # fileHandle - The filehandle to write to. # symbol - The to write, or undef. # # Format: # # > [UInt8: number of identifiers] # > [UString16: identifier] [UString16: identifier] ... # # Undef is represented by a zero for the number of identifiers. # sub ToBinaryFile #(FileHandle fileHandle, SymbolString symbol) { my ($self, $fileHandle, $symbol) = @_; my @identifiers; if (defined $symbol) { @identifiers = $self->IdentifiersOf($symbol); }; print $fileHandle pack('C', scalar @identifiers); foreach my $identifier (@identifiers) { my $uIdentifier = encode_utf8($identifier); print $fileHandle pack('na*', length($uIdentifier), $uIdentifier); }; }; # # Function: FromBinaryFile # # Loads a or undef from the filehandle and returns it. # # Parameters: # # fileHandle - The filehandle to read from. # # Returns: # # The or undef. # # See also: # # See for format and dependencies. # sub FromBinaryFile #(FileHandle fileHandle) { my ($self, $fileHandle) = @_; my $raw; # [UInt8: number of identifiers or 0 if none] read($fileHandle, $raw, 1); my $identifierCount = unpack('C', $raw); my @identifiers; while ($identifierCount) { # [UString16: identifier] [UString16: identifier] ... read($fileHandle, $raw, 2); my $identifierLength = unpack('n', $raw); my $identifier; read($fileHandle, $identifier, $identifierLength); $identifier = decode_utf8($identifier); push @identifiers, $identifier; $identifierCount--; }; if (scalar @identifiers) { return $self->Join(@identifiers); } else { return undef; }; }; # # Function: IdentifiersOf # # Returns the as an array of identifiers. # sub IdentifiersOf #(SymbolString symbol) { my ($self, $symbol) = @_; return split(/\x1F/, $symbol); }; # # Function: Join # # Takes a list of identifiers and/or and returns it as a new . # sub Join #(string/SymbolString identifier/symbol, string/SymolString identifier/symbol, ...) { my ($self, @pieces) = @_; # Can't have undefs screwing everything up. while (scalar @pieces && !defined $pieces[0]) { shift @pieces; }; # We need to test @pieces first because joining on an empty array returns an empty string rather than undef. if (scalar @pieces) { return join("\x1F", @pieces); } else { return undef; }; }; 1;