Re: [Templates] Patch: Full unicode support for TT under 5.8
[prev]
[thread]
[next]
[Date index for 2004/07/23]
--h31gzZEtNLTqOjlF
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Mark Fowler wrote:
> Hello List.
Hello Mark.
> Attached is a patch (and a test which tests the patch) that allows
> the Template Toolkit to work properly with Unicode in perl 5.8.
Huzzah! We've been needing this for some time. Many thanks.
I finally got around to looking at it (sorry for the delay), but when
I run the tests, they all fail with the following message:
file error - cache failed to write UTF-XX.ttcache: Undefined subroutine
utf8::is_utf8 called at ../lib/Template/Document.pm line 295
And here's the line:
> + if ($] > 5.007 && utf8::is_utf8($perlcode)) {
According to my "perldoc utf8" (Perl 5.8.0), there is no "is_utf8" function.
It says this:
"Note that in the Perl 5.8.0 implementation the functions utf8::valid,
utf8::encode, utf8::decode, utf8::upgrade, and utf8::downgrade are
always available, without a "require utf8" statement-- this may change
in future releases."
Looking in an online copy of the 5.8.1 docs, it seems that is_utf8 was
only added in 5.8.1, but is functionaly equivalent to Encode::is_utf8,
which (I presume because it is in my case) is available with 5.8.0.
So I hacked Document.pm to alias is_utf8() to Encode::is_utf8() or
utf8::is_utf8() accordingly, and the tests now work on my 5.8.0 machine,
and fail gracefully on 5.6.1. I don't have a 5.8.1. installation to hand,
so I'd appreciate it if you could check it out.
I also changed a couple of other minor things that we've previously talked
about in email: a package var, config option, etc.
CVS diffs for Provider.pm and Document.pm are attached for your perusal.
If we're all happy then I'll commit and push a developer release.
Cheers
A
--h31gzZEtNLTqOjlF
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="provider.diff"
Index: lib/Template/Provider.pm
===================================================================
RCS file: /template-toolkit/Template2/lib/Template/Provider.pm,v
retrieving revision 2.80
diff -u -u -r2.80 Provider.pm
--- lib/Template/Provider.pm 2004/01/30 19:32:28 2.80
+++ lib/Template/Provider.pm 2004/07/23 09:47:12
@@ -36,7 +36,7 @@
require 5.004;
use strict;
-use vars qw( $VERSION $DEBUG $ERROR $DOCUMENT $STAT_TTL $MAX_DIRS );
+use vars qw( $VERSION $DEBUG $ERROR $DOCUMENT $STAT_TTL $MAX_DIRS $UNICODE );
use base qw( Template::Base );
use Template::Config;
use Template::Constants;
@@ -64,6 +64,27 @@
$DEBUG = 0 unless defined $DEBUG;
+# UNICODE is supported in versions of Perl from 5.007 onwards
+$UNICODE = $] > 5.007 ? 1 : 0;
+
+my $boms = [
+ 'UTF-8' => "\x{ef}\x{bb}\x{bf}",
+ 'UTF-32BE' => "\x{0}\x{0}\x{fe}\x{ff}",
+ 'UTF-32LE' => "\x{ff}\x{fe}\x{0}\x{0}",
+ 'UTF-16BE' => "\x{fe}\x{ff}",
+ 'UTF-16LE' => "\x{ff}\x{fe}",
+];
+
+# hack so that 'use bytes' will compile on versions of Perl earlier than
+# 5.6, even though we never call _decode_unicode() on those systems
+BEGIN {
+ if ($] < 5.006) {
+ package bytes;
+ $INC{'bytes.pm'} = 1;
+ }
+}
+
+
#========================================================================
# -- PUBLIC METHODS --
#========================================================================
@@ -394,6 +415,10 @@
# $self->{ PREFIX } = $params->{ PREFIX };
$self->{ PARAMS } = $params;
+ # look for user-provided UNICODE parameter or use default from package var
+ $self->{ UNICODE } = defined $params->{ UNICODE }
+ ? $params->{ UNICODE } : $UNICODE;
+
return $self;
}
@@ -628,6 +653,7 @@
elsif (ref $name) {
# ...or a GLOB or file handle...
my $text = <$name>;
+ $text = $self->_decode_unicode($text) if $self->{ UNICODE };
$data = {
name => defined $alias ? $alias : 'input file handle',
text => $text,
@@ -638,6 +664,7 @@
elsif (-f $name) {
if (open(FH, $name)) {
my $text = <FH>;
+ $text = $self->_decode_unicode($text) if $self->{ UNICODE };
$data = {
name => $alias,
path => $name,
@@ -967,7 +994,44 @@
}
}
+
+#------------------------------------------------------------------------
+# _decode_unicode
+#
+# Decodes encoded unicode text that starts with a BOM and
+# turns it into perl's internal representation
+#------------------------------------------------------------------------
+
+
+sub _decode_unicode
+{
+ use bytes;
+
+ my $self = shift;
+ my $string = shift;
+
+ # try all the BOMs in order looking for one (order is important
+ # 32bit BOMs look like 16bit BOMs)
+ my $count = 0;
+ while ($count < @{ $boms }) {
+ my $enc = $boms->[$count++];
+ my $bom = $boms->[$count++];
+
+ # does the string start with the bom?
+ if ($bom eq substr($string, 0, length($bom))) {
+ # decode it and hand it back
+ require Encode;
+ return Encode::decode($enc, substr($string, length($bom)), 1);
+ }
+ }
+
+ # no boms matched so it must be a non unicode string which we return as is
+ return $string;
+}
+
+
1;
+
__END__
--h31gzZEtNLTqOjlF
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="document.diff"
Index: lib/Template/Document.pm
===================================================================
RCS file: /template-toolkit/Template2/lib/Template/Document.pm,v
retrieving revision 2.72
diff -u -u -r2.72 Document.pm
--- lib/Template/Document.pm 2004/01/30 19:32:25 2.72
+++ lib/Template/Document.pm 2004/07/23 10:12:13
@@ -29,13 +29,28 @@
require 5.004;
use strict;
-use vars qw( $VERSION $ERROR $COMPERR $DEBUG $AUTOLOAD );
+use vars qw( $VERSION $ERROR $COMPERR $DEBUG $AUTOLOAD $UNICODE );
use base qw( Template::Base );
use Template::Constants;
$VERSION = sprintf("%d.%02d", q$Revision: 2.72 $ =~ /(\d+)\.(\d+)/);
+BEGIN {
+ # UNICODE is supported in versions of Perl from 5.008 onwards
+ if ($UNICODE = $] > 5.007 ? 1 : 0) {
+ if ($^V gt v5.8.0) {
+ # utf8::is_utf8() available from Perl 5.8.1 onwards
+ *is_utf8 = \&utf8::is_utf8;
+ }
+ elsif ($^V eq v5.8.0) {
+ # use Encode::is_utf8() for Perl 5.8.0
+ require Encode;
+ *is_utf8 = \&Encode::is_utf8;
+ }
+ }
+}
+
#========================================================================
# ----- PUBLIC METHODS -----
#========================================================================
@@ -280,12 +295,18 @@
($fh, $tmpfile) = File::Temp::tempfile(
DIR => File::Basename::dirname($file)
);
- print $fh $class->as_perl($content) || die $!;
- close($fh);
+ my $perlcode = $class->as_perl($content) || die $!;
+
+ if ($UNICODE && is_utf8($perlcode)) {
+ $perlcode = "use utf8;\n\n$perlcode";
+ binmode $fh, ":utf8";
+ }
+ print $fh $perlcode;
+ close($fh);
};
return $class->error($@) if $@;
return rename($tmpfile, $file)
- || $class->error($!);
+ || $class->error($!);
}
--h31gzZEtNLTqOjlF--
_______________________________________________
templates mailing list
templates@xxxxxxxxxxxxxxxx.xxx
http://lists.template-toolkit.org/mailman/listinfo/templates