mirror of
https://github.com/lxsang/antd-lua-plugin
synced 2025-01-07 14:28:22 +01:00
193 lines
5.4 KiB
Perl
193 lines
5.4 KiB
Perl
|
###############################################################################
|
|||
|
#
|
|||
|
# Class: NaturalDocs::LineReader
|
|||
|
#
|
|||
|
###############################################################################
|
|||
|
#
|
|||
|
# An object to handle reading text files line by line in a cross platform manner. Using this class instead of the standard
|
|||
|
# angle brackets approach has the following benefits:
|
|||
|
#
|
|||
|
# - It strips all three types of line breaks automatically: CR/LF (Windows) LF (Unix) and CR (Classic Mac). You do not need to
|
|||
|
# call chomp(). Perl's chomp() fails when parsing Windows-format line breaks on a Unix platform anyway. It leaves the /r on,
|
|||
|
# which screws everything up.
|
|||
|
# - It reads Classic Mac files line by line correctly, whereas the Perl version returns it all as one line.
|
|||
|
# - It abstracts away ignoring the Unicode BOM on the first line, if present.
|
|||
|
#
|
|||
|
###############################################################################
|
|||
|
|
|||
|
# This file is part of Natural Docs, which is Copyright <20> 2003-2010 Greg Valure
|
|||
|
# Natural Docs is licensed under version 3 of the GNU Affero General Public License (AGPL)
|
|||
|
# Refer to License.txt for the complete details
|
|||
|
|
|||
|
use strict;
|
|||
|
use integer;
|
|||
|
|
|||
|
use Encode;
|
|||
|
|
|||
|
|
|||
|
package NaturalDocs::LineReader;
|
|||
|
|
|||
|
#
|
|||
|
# Constants: Members
|
|||
|
#
|
|||
|
# LINEREADER_FILEHANDLE - The file handle being used to read the file. Has the LINEREADER_ prefix to make sure it doesn't
|
|||
|
# conflict with any actual filehandles named FILEHANDLE in the program.
|
|||
|
# CACHED_LINES - An arrayref of lines already read into memory.
|
|||
|
#
|
|||
|
use NaturalDocs::DefineMembers 'LINEREADER_FILEHANDLE',
|
|||
|
'CACHED_LINES';
|
|||
|
|
|||
|
#
|
|||
|
# Function: New
|
|||
|
#
|
|||
|
# Creates and returns a new object.
|
|||
|
#
|
|||
|
# Parameters:
|
|||
|
#
|
|||
|
# filehandle - The file handle being used to read the file.
|
|||
|
#
|
|||
|
sub New #(filehandle)
|
|||
|
{
|
|||
|
my ($selfPackage, $filehandle) = @_;
|
|||
|
|
|||
|
my $object = [ ];
|
|||
|
|
|||
|
$object->[LINEREADER_FILEHANDLE] = $filehandle;
|
|||
|
$object->[CACHED_LINES] = [ ];
|
|||
|
|
|||
|
binmode($filehandle, ':raw');
|
|||
|
|
|||
|
my $hasBOM = 0;
|
|||
|
my $possibleBOM = undef;
|
|||
|
read($filehandle, $possibleBOM, 2);
|
|||
|
|
|||
|
if ($possibleBOM eq "\xEF\xBB")
|
|||
|
{
|
|||
|
read($filehandle, $possibleBOM, 1);
|
|||
|
if ($possibleBOM eq "\xBF")
|
|||
|
{
|
|||
|
binmode($filehandle, ':crlf:encoding(UTF-8)'); # Strict UTF-8, not Perl's lax version.
|
|||
|
$hasBOM = 1;
|
|||
|
}
|
|||
|
}
|
|||
|
elsif ($possibleBOM eq "\xFE\xFF")
|
|||
|
{
|
|||
|
binmode($filehandle, ':crlf:encoding(UTF-16BE)');
|
|||
|
$hasBOM = 1;
|
|||
|
}
|
|||
|
elsif ($possibleBOM eq "\xFF\xFE")
|
|||
|
{
|
|||
|
binmode($filehandle, ':crlf:encoding(UTF-16LE)');
|
|||
|
$hasBOM = 1;
|
|||
|
}
|
|||
|
|
|||
|
if (!$hasBOM)
|
|||
|
{
|
|||
|
seek($filehandle, 0, 0);
|
|||
|
|
|||
|
my $rawData = undef;
|
|||
|
my $readLength = -s $filehandle;
|
|||
|
|
|||
|
# Since we're only reading the data to determine if it's UTF-8, sanity check the file length. We may run
|
|||
|
# across a huge extensionless system file and we don't want to load the whole thing. Half a meg should
|
|||
|
# be good enough to encompass giant source files while not bogging things down on system files.
|
|||
|
if ($readLength > 512 * 1024)
|
|||
|
{ $readLength = 512 * 1024; }
|
|||
|
|
|||
|
read($filehandle, $rawData, $readLength);
|
|||
|
|
|||
|
eval
|
|||
|
{ $rawData = Encode::decode("UTF-8", $rawData, Encode::FB_CROAK); };
|
|||
|
|
|||
|
if ($::EVAL_ERROR)
|
|||
|
{ binmode($filehandle, ':crlf'); }
|
|||
|
else
|
|||
|
{
|
|||
|
# Theoretically, since this is valid UTF-8 data we should be able to split it on line breaks and feed them into
|
|||
|
# CACHED_LINES instead of setting the encoding to UTF-8 and seeking back to zero just to read it all again.
|
|||
|
# Alas, this doesn't work for an easily identifiable reason. I'm sure there is one, but I couldn't figure it out
|
|||
|
# before my patience ran out so I'm just letting the file cache absorb the hit instead. If we were ever to do
|
|||
|
# this in the future you'd have to handle the file length capping code above too.
|
|||
|
binmode($filehandle, ':crlf:encoding(UTF-8)');
|
|||
|
}
|
|||
|
|
|||
|
seek($filehandle, 0, 0);
|
|||
|
}
|
|||
|
|
|||
|
bless $object, $selfPackage;
|
|||
|
return $object;
|
|||
|
};
|
|||
|
|
|||
|
|
|||
|
#
|
|||
|
# Function: Chomp
|
|||
|
#
|
|||
|
# Removes any line breaks from the end of a value. It does not remove any that are in the middle of it.
|
|||
|
#
|
|||
|
# Parameters:
|
|||
|
#
|
|||
|
# lineRef - A *reference* to the line to chomp.
|
|||
|
#
|
|||
|
sub Chomp #(lineRef)
|
|||
|
{
|
|||
|
my ($self, $lineRef) = @_;
|
|||
|
$$lineRef =~ s/(?:\r\n|\r|\n)$//;
|
|||
|
};
|
|||
|
|
|||
|
|
|||
|
#
|
|||
|
# Function: Get
|
|||
|
#
|
|||
|
# Returns the next line of text from the file, or undef if there are no more. The line break will be removed automatically. If
|
|||
|
# the first line contains a Unicode BOM, that will also be removed automatically.
|
|||
|
#
|
|||
|
sub Get
|
|||
|
{
|
|||
|
my $self = shift;
|
|||
|
my $line = undef;
|
|||
|
|
|||
|
if (scalar @{$self->[CACHED_LINES]} == 0)
|
|||
|
{
|
|||
|
my $filehandle = $self->[LINEREADER_FILEHANDLE];
|
|||
|
my $rawLine = <$filehandle>;
|
|||
|
|
|||
|
if (!defined $rawLine)
|
|||
|
{ return undef; }
|
|||
|
|
|||
|
$self->Chomp(\$rawLine);
|
|||
|
|
|||
|
if ($rawLine =~ /\r/)
|
|||
|
{
|
|||
|
push @{$self->[CACHED_LINES]}, split(/\r/, $rawLine); # Split for Classic Mac
|
|||
|
$line = shift @{$self->[CACHED_LINES]};
|
|||
|
}
|
|||
|
else
|
|||
|
{ $line = $rawLine; }
|
|||
|
}
|
|||
|
else
|
|||
|
{ $line = shift @{$self->[CACHED_LINES]}; }
|
|||
|
|
|||
|
return $line;
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
#
|
|||
|
# Function: GetAll
|
|||
|
#
|
|||
|
# Returns an array of all the lines from the file. The line breaks will be removed automatically. If the first line contains a
|
|||
|
# Unicode BOM, that will also be removed automatically.
|
|||
|
#
|
|||
|
sub GetAll
|
|||
|
{
|
|||
|
my $self = shift;
|
|||
|
|
|||
|
my $filehandle = $self->[LINEREADER_FILEHANDLE];
|
|||
|
my $rawContent;
|
|||
|
|
|||
|
read($filehandle, $rawContent, -s $filehandle);
|
|||
|
|
|||
|
return split(/\r\n|\n|\r/, $rawContent);
|
|||
|
}
|
|||
|
|
|||
|
1;
|