/[MIME-Charset]/tags/MIME-Charset-0.04.1/Charset.pm
ViewVC logotype

Contents of /tags/MIME-Charset-0.04.1/Charset.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 9 - (show annotations) (download)
Sun Oct 22 06:04:30 2006 UTC (3 years, 10 months ago) by hatukanezumi
File size: 20748 byte(s)
Release 0.04.1
1
2 package MIME::Charset;
3 use 5.005;
4
5 =head1 NAME
6
7 MIME::Charset - Charset Informations for MIME
8
9 =head1 SYNOPSIS
10
11 Getting charset informations:
12
13 use MIME::Charset qw(:info);
14
15 $benc = body_encoding("iso-8859-2"); # "Q"
16 $cset = canonical_charset("ANSI X3.4-1968"); # "US-ASCII"
17 $henc = header_encoding("utf-8"); # "S"
18 $cset = output_charset("shift_jis"); # "ISO-2022-JP"
19
20 Translating text data:
21
22 use MIME::Charset qw(:trans);
23
24 ($text, $charset, $encoding) =
25 header_encode(
26 "\xc9\xc2\xc5\xaa\xc0\xde\xc3\xef\xc5\xaa".
27 "\xc7\xd1\xca\xaa\xbd\xd0\xce\xcf\xb4\xef",
28 "euc-jp");
29 # ...returns (<converted>, "ISO-2022-JP", "B");
30
31 ($text, $charset, $encoding) =
32 body_encode(
33 "Collectioneur path\xe9tiquement ".
34 "\xe9clectique de d\xe9chets",
35 "latin1");
36 # ...returns (<original>, "ISO-8859-1", "QUOTED-PRINTABLE");
37
38 $len = encoded_header_len(
39 "Perl\xe8\xa8\x80\xe8\xaa\x9e", "b", "utf-8"); # 28
40
41 Manipulating module defaults:
42
43 use MIME::Charset;
44
45 MIME::Charset::alias("csEUCKR", "euc-kr");
46 MIME::Charset::default("iso-8859-1");
47 MIME::Charset::fallback("us-ascii");
48
49 =head1 DESCRIPTION
50
51 MIME::Charset provides informations about character sets used for
52 MIME messages on Internet.
53
54 =head2 DEFINITIONS
55
56 The B<charset> is ``character set'' used in MIME to refer to a
57 method of converting a sequence of octets into a sequence of characters.
58 It includes both concepts of ``coded character set'' (CCS) and
59 ``character encoding scheme'' (CES) of ISO/IEC.
60
61 The B<encoding> is that used in MIME to refer to a method of representing
62 a body part or a header body as sequence(s) of printable US-ASCII
63 characters.
64
65 =over 4
66
67 =cut
68
69 use strict;
70 use vars qw(@ISA $VERSION @EXPORT @EXPORT_OK %EXPORT_TAGS);
71 use Exporter;
72 @ISA = qw(Exporter);
73 @EXPORT = qw(body_encoding canonical_charset header_encoding output_charset
74 body_encode encoded_header_len header_encode);
75 @EXPORT_OK = qw(alias default fallback recommended);
76 %EXPORT_TAGS = (
77 "info" => [qw(body_encoding header_encoding
78 canonical_charset output_charset)],
79 "trans" =>[ qw(body_encode encoded_header_len
80 header_encode)],
81 );
82 use Carp qw(croak);
83
84 use constant USE_ENCODE => ($] >= 5.008001)? 'Encode': '';
85
86 my @ENCODE_SUBS = qw(FB_CROAK FB_PERLQQ FB_HTMLCREF FB_XMLCREF
87 decode encode from_to is_utf8 resolve_alias);
88 if (USE_ENCODE) {
89 eval "use ".USE_ENCODE." \@ENCODE_SUBS;";
90 } else {
91 require MIME::Charset::_Compat;
92 for my $sub (@ENCODE_SUBS) {
93 no strict "refs";
94 *{$sub} = \&{"MIME::Charset::_Compat::$sub"};
95 }
96 }
97
98 $VERSION = '0.04.1';
99
100 ######## Private Attributes ########
101
102 my $DEFAULT_CHARSET = 'US-ASCII';
103 my $FALLBACK_CHARSET = 'UTF-8';
104
105 # This table was borrwed from Python email package.
106
107 my %CHARSETS = (# input header enc body enc output conv
108 'ISO-8859-1' => ['Q', 'Q', undef],
109 'ISO-8859-2' => ['Q', 'Q', undef],
110 'ISO-8859-3' => ['Q', 'Q', undef],
111 'ISO-8859-4' => ['Q', 'Q', undef],
112 # ISO-8859-5 is Cyrillic, and not especially used
113 # ISO-8859-6 is Arabic, also not particularly used
114 # ISO-8859-7 is Greek, 'Q' will not make it readable
115 # ISO-8859-8 is Hebrew, 'Q' will not make it readable
116 'ISO-8859-9' => ['Q', 'Q', undef],
117 'ISO-8859-10' => ['Q', 'Q', undef],
118 # ISO-8859-11 is Thai, 'Q' will not make it readable
119 'ISO-8859-13' => ['Q', 'Q', undef],
120 'ISO-8859-14' => ['Q', 'Q', undef],
121 'ISO-8859-15' => ['Q', 'Q', undef],
122 'WINDOWS-1252' => ['Q', 'Q', undef],
123 'VISCII' => ['Q', 'Q', undef],
124 'US-ASCII' => [undef, undef, undef],
125 'BIG5' => ['B', 'B', undef],
126 'GB2312' => ['B', 'B', undef],
127 'EUC-JP' => ['B', undef, 'ISO-2022-JP'],
128 'SHIFT_JIS' => ['B', undef, 'ISO-2022-JP'],
129 'ISO-2022-JP' => ['B', undef, undef],
130 'KOI8-R' => ['B', 'B', undef],
131 'UTF-8' => ['S', 'B', undef],
132 # We're making this one up to represent raw unencoded 8bit
133 '8BIT' => [undef, 'B', 'ISO-8859-1'],
134 );
135
136 # Fix some unexpected or unpreferred names returned by
137 # Encode::resolve_alias() or used by somebodies else.
138 my %CHARSET_ALIASES = (# unpreferred preferred
139 "ASCII" => "US-ASCII",
140 "BIG5-ETEN" => "BIG5",
141 "CP1251" => "WINDOWS-1251",
142 "CP1252" => "WINDOWS-1252",
143 "CP936" => "GBK",
144 "CP949" => "KS_C_5601-1987",
145 "EUC-CN" => "GB2312",
146 "KS_C_5601" => "KS_C_5601-1987",
147 "SHIFTJIS" => "SHIFT_JIS",
148 "SHIFTJISX0213" => "SHIFT_JISX0213",
149 "UNICODE-1-1-UTF-7" => "UTF-7",
150 "UTF8" => "UTF-8",
151 );
152
153 # ISO-2022-* escape sequnces to detect charset from unencoded data.
154 my @ISO2022_SEQ = (# escape seq possible charset
155 # Following sequences are commonly used.
156 ["\033\$\@", "ISO-2022-JP"], # RFC 1468
157 ["\033\$B", "ISO-2022-JP"], # ditto
158 ["\033(J", "ISO-2022-JP"], # ditto
159 ["\033(I", "ISO-2022-JP"], # ditto (nonstandard)
160 ["\033\$(D", "ISO-2022-JP"], # RFC 2237 (note*)
161 # Folloing sequences are less commonly used.
162 ["\033\$)C", "ISO-2022-KR"], # RFC 1557
163 ["\033\$)A", "ISO-2022-CN"], # RFC 1922
164 ["\033\$A", "ISO-2022-CN"], # ditto (nonstandard)
165 ["\033\$)G", "ISO-2022-CN"], # ditto
166 ["\033\$*H", "ISO-2022-CN"], # ditto
167 # Other sequences will be used with appropriate charset
168 # parameters, or hardly used.
169 );
170
171 # note*: This RFC defines ISO-2022-JP-1, superset of
172 # ISO-2022-JP. But that charset name is rarely used.
173 # OTOH many of codecs for ISO-2022-JP recognize this
174 # sequence so that comatibility with EUC-JP will be
175 # guaranteed.
176
177 ######## Private Constants ########
178
179 my $NONASCIIRE = qr{
180 [^\x01-\x7e]
181 }x;
182
183 my $ISO2022RE = qr{
184 ^ISO-2022-
185 }ix;
186
187
188 ######## Public Functions ########
189
190 =head2 GETTING INFORMATIONS OF CHARSETS
191
192 =item body_encoding CHARSET
193
194 Get recommended transfer-encoding of CHARSET for message body.
195
196 Returned value will be one of C<"B"> (BASE64), C<"Q"> (QUOTED-PRINTABLE) or
197 C<undef> (might not be transfer-encoded; either 7BIT or 8BIT). This may
198 not be same as encoding for message header.
199
200 =cut
201
202 sub body_encoding($) {
203 my $charset = shift;
204 return undef unless $charset;
205 return (&recommended($charset))[1];
206 }
207
208 =item canonical_charset CHARSET
209
210 Get canonical name for charset CHARSET.
211
212 =cut
213
214 sub canonical_charset($) {
215 my $charset = shift;
216 return undef unless $charset;
217 my $cset = resolve_alias($charset) || $charset;
218 return $CHARSET_ALIASES{uc($cset)} || uc($cset);
219 }
220
221 =item header_encoding CHARSET
222
223 Get recommended encoding scheme of CHARSET for message header.
224
225 Returned value will be one of C<"B">, C<"Q">, C<"S"> (shorter one of either)
226 or C<undef> (might not be encoded). This may not be same as encoding
227 for message body.
228
229 =cut
230
231 sub header_encoding($) {
232 my $charset = shift;
233 return undef unless $charset;
234 return (&recommended($charset))[0];
235 }
236
237 =item output_charset CHARSET
238
239 Get a charset which is compatible with given CHARSET and is recommended
240 to be used for MIME messages on Internet (if it is known by this module).
241
242 When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
243 this function will simply
244 return the result of L<"canonical_charset">.
245
246 =cut
247
248 sub output_charset($) {
249 my $charset = shift;
250 return undef unless $charset;
251 return (&recommended($charset))[2] || uc($charset);
252 }
253
254 =head2 TRANSLATING TEXT DATA
255
256 =item body_encode STRING, CHARSET [, OPTS]
257
258 Get converted (if needed) data of STRING and recommended transfer-encoding
259 of that data for message body. CHARSET is the charset by which STRING
260 is encoded.
261
262 OPTS may accept following key-value pairs.
263 B<NOTE>:
264 When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
265 conversion will not be performed. So these options do not have any effects.
266
267 =over 4
268
269 =item Replacement => REPLACEMENT
270
271 Specifies error handling scheme. See L<"ERROR HANDLING">.
272
273 =item Detect7bit => YESNO
274
275 Try auto-detecting 7-bit charset when CHARSET is not given.
276 Default is C<"YES">.
277
278 =back
279
280 3-item list of (I<converted string>, I<charset for output>,
281 I<transfer-encoding>) will be returned.
282 I<Transfer-encoding> will be either C<"BASE64">, C<"QUOTED-PRINTABLE">,
283 C<"7BIT"> or C<"8BIT">. If I<charset for output> could not be determined
284 and I<converted string> contains non-ASCII byte(s), I<charset for output> will
285 be C<undef> and I<transfer-encoding> will be C<"BASE64">.
286 I<Charset for output> will be C<"US-ASCII"> if and only if string does not
287 contain any non-ASCII bytes.
288
289 =cut
290
291 sub body_encode {
292 my ($encoded, $charset, $cset) = &_text_encode(@_);
293
294 # Determine transfer-encoding.
295 my $enc;
296 my $dummy = $encoded;
297 eval {
298 from_to($dummy, $cset, "US-ASCII", FB_CROAK());
299 };
300 if (!$@ and $dummy eq $encoded) {
301 $cset = "US-ASCII";
302 $enc = undef;
303 } else {
304 $@ = '';
305 $enc = &body_encoding($charset);
306 }
307
308 if (!$enc and $encoded !~ /\x00/) { # Eliminate hostile NUL character.
309 if ($encoded =~ $NONASCIIRE) { # String contains 8bit char(s).
310 $enc = '8BIT';
311 } elsif ($cset =~ $ISO2022RE) { # ISO-2022-* outputs are 7BIT.
312 $enc = '7BIT';
313 } else { # Pure ASCII.
314 $enc = '7BIT';
315 $cset = 'US-ASCII';
316 }
317 } elsif ($enc eq 'B') {
318 $enc = 'BASE64';
319 } elsif ($enc eq 'Q') {
320 $enc = 'QUOTED-PRINTABLE';
321 } else {
322 $enc = 'BASE64';
323 }
324 return ($encoded, $cset, $enc);
325 }
326
327 =item encoded_header_len STRING, ENCODING, CHARSET
328
329 Get length of encoded STRING for message header
330 (without folding).
331
332 ENCODING may be one of C<"B">, C<"Q"> or C<"S"> (shorter
333 one of either C<"B"> or C<"Q">).
334
335 =cut
336
337 sub encoded_header_len($$$) {
338 my $s = shift;
339 my $encoding = uc(shift);
340 my $charset = shift;
341
342 my $enclen;
343 if ($encoding eq 'Q') {
344 $enclen = _enclen_Q($s);
345 } elsif ($encoding eq "S") {
346 my ($b, $q) = (_enclen_B($s), _enclen_Q($s));
347 $enclen = ($b < $q)? $b: $q;
348 } else { # "B"
349 $enclen = _enclen_B($s);
350 }
351
352 length($charset)+$enclen+7;
353 }
354
355 sub _enclen_B($) {
356 int((length(shift) + 2) / 3) * 4;
357 }
358
359 sub _enclen_Q($) {
360 my $s = shift;
361 my @o;
362 @o = ($s =~ /(\?|=|_|[^ \x21-\x7e])/gos);
363 length($s) + scalar(@o) * 2;
364 }
365
366 =item header_encode STRING, CHARSET [, OPTS]
367
368 Get converted (if needed) data of STRING and recommended encoding scheme of
369 that data for message headers. CHARSET is the charset by which STRING
370 is encoded.
371
372 OPTS may accept following key-value pairs.
373 B<NOTE>:
374 When Unicode/multibyte support is disabled (see L<"USE_ENCODE">),
375 conversion will not be performed. So these options do not have any effects.
376
377 =over 4
378
379 =item Replacement => REPLACEMENT
380
381 Specifies error handling scheme. See L<"ERROR HANDLING">.
382
383 =item Detect7bit => YESNO
384
385 Try auto-detecting 7-bit charset when CHARSET is not given.
386 Default is C<"YES">.
387
388 =back
389
390 3-item list of (I<converted string>, I<charset for output>,
391 I<encoding scheme>) will be returned. I<Encoding scheme> will be
392 either C<"B">, C<"Q"> or C<undef> (might not be encoded).
393 If I<charset for output> could not be determined and I<converted string>
394 contains non-ASCII byte(s), I<charset for output> will be C<"8BIT">
395 (this is I<not> charset name but a special value to represent unencodable
396 data) and I<encoding scheme> will be C<undef> (should not be encoded).
397 I<Charset for output> will be C<"US-ASCII"> if and only if string does not
398 contain any non-ASCII bytes.
399
400 =back
401
402 =cut
403
404 sub header_encode {
405 my ($encoded, $charset, $cset) = &_text_encode(@_);
406 return ($encoded, '8BIT', undef) unless $cset;
407
408 # Determine encoding scheme.
409 my $enc;
410 my $dummy = $encoded;
411 eval {
412 from_to($dummy, $cset, "US-ASCII", FB_CROAK());
413 };
414 if (!$@ and $dummy eq $encoded) {
415 $cset = "US-ASCII";
416 $enc = undef;
417 } else {
418 $@ = '';
419 $enc = &header_encoding($charset);
420 }
421
422 if (!$enc and $encoded !~ $NONASCIIRE) {
423 unless ($cset =~ $ISO2022RE) { # ISO-2022-* outputs are 7BIT.
424 $cset = 'US-ASCII';
425 }
426 } elsif ($enc eq 'S') {
427 if (&encoded_header_len($encoded, "B", $cset) <
428 &encoded_header_len($encoded, "Q", $cset)) {
429 $enc = 'B';
430 } else {
431 $enc = 'Q';
432 }
433 } elsif ($enc !~ /^[BQ]$/) {
434 $enc = 'B';
435 }
436 return ($encoded, $cset, $enc);
437 }
438
439 sub _text_encode {
440 my $s = shift;
441 my $charset = &canonical_charset(shift);
442 my %params = @_;
443 my $replacement = uc($params{'Replacement'}) || "DEFAULT";
444 my $detect7bit = uc($params{'Detect7bit'}) || "YES";
445
446 if (!$charset) {
447 if ($s =~ $NONASCIIRE) {
448 return ($s, undef, undef);
449 } elsif ($detect7bit ne "NO") {
450 $charset = &_detect_7bit_charset($s);
451 } else {
452 $charset = $DEFAULT_CHARSET;
453 }
454 }
455
456 # Unknown charset.
457 return ($s, $charset, $charset)
458 unless resolve_alias($charset);
459
460 # Encode data by output charset if required. If failed, fallback to
461 # fallback charset.
462 my $cset = &output_charset($charset);
463 my $encoded;
464
465 if (is_utf8($s) or $s =~ /[^\x00-\xFF]/) {
466 if ($replacement =~ /^(?:CROAK|STRICT|FALLBACK)$/) {
467 eval {
468 $encoded = $s;
469 $encoded = encode($cset, $encoded, FB_CROAK());
470 };
471 if ($@) {
472 if ($replacement eq "FALLBACK" and $FALLBACK_CHARSET) {
473 $cset = $FALLBACK_CHARSET;
474 $encoded = $s;
475 $encoded = encode($cset, $encoded);
476 $charset = $cset;
477 } else {
478 croak $@;
479 }
480 }
481 } elsif ($replacement eq "PERLQQ") {
482 $encoded = encode($cset, $s, FB_PERLQQ());
483 } elsif ($replacement eq "HTMLCREF") {
484 $encoded = encode($cset, $s, FB_HTMLCREF());
485 } elsif ($replacement eq "XMLCREF") {
486 $encoded = encode($cset, $s, FB_XMLCREF());
487 } else {
488 $encoded = encode($cset, $s);
489 }
490 } elsif ($charset ne $cset) {
491 $encoded = $s;
492 if ($replacement =~ /^(?:CROAK|STRICT|FALLBACK)$/) {
493 eval {
494 from_to($encoded, $charset, $cset, FB_CROAK());
495 };
496 if ($@) {
497 if ($replacement eq "FALLBACK" and $FALLBACK_CHARSET) {
498 $cset = $FALLBACK_CHARSET;
499 $encoded = $s;
500 from_to($encoded, $charset, $cset);
501 $charset = $cset;
502 } else {
503 croak $@;
504 }
505 }
506 } elsif ($replacement eq "PERLQQ") {
507 from_to($encoded, $charset, $cset, FB_PERLQQ());
508 } elsif ($replacement eq "HTMLCREF") {
509 from_to($encoded, $charset, $cset, FB_HTMLCREF());
510 } elsif ($replacement eq "XMLCREF") {
511 from_to($encoded, $charset, $cset, FB_XMLCREF());
512 } else {
513 from_to($encoded, $charset, $cset);
514 }
515 } else {
516 $encoded = $s;
517 }
518
519 return ($encoded, $charset, $cset);
520 }
521
522 sub _detect_7bit_charset($) {
523 return $DEFAULT_CHARSET unless USE_ENCODE;
524 my $s = shift;
525 return $DEFAULT_CHARSET unless $s;
526
527 # Try to detect ISO-2022-* escape sequences.
528 foreach (@ISO2022_SEQ) {
529 my ($seq, $cset) = @$_;
530 if (index($s, $seq) >= 0) {
531 eval {
532 my $dummy = $s;
533 decode($cset, $dummy, FB_CROAK());
534 };
535 if ($@) {
536 next;
537 }
538 return $cset;
539 }
540 }
541
542 # How about HZ, VIQR, ...?
543
544 return $DEFAULT_CHARSET;
545 }
546
547 =head2 MANIPULATING MODULE DEFAULTS
548
549 =over 4
550
551 =item alias ALIAS [, CHARSET]
552
553 Get/set charset alias for canonical names determined by
554 L<"canonical_charset">.
555
556 If CHARSET is given and isn't false, ALIAS will be assigned as an alias of
557 CHARSET. Otherwise, alias won't be changed. In both cases,
558 current charset name that ALIAS is assigned will be returned.
559
560 =cut
561
562 sub alias ($;$) {
563 my $alias = uc(shift);
564 my $charset = uc(shift);
565
566 return $CHARSET_ALIASES{$alias} unless $charset;
567
568 $CHARSET_ALIASES{$alias} = $charset;
569 return $charset;
570 }
571
572 =item default [CHARSET]
573
574 Get/set default charset.
575
576 B<Default charset> is used by this module when charset context is
577 unknown. Modules using this module are recommended to use this
578 charset when charset context is unknown or implicit default is
579 expected. By default, it is C<"US-ASCII">.
580
581 If CHARSET is given and isn't false, it will be set to default charset.
582 Otherwise, default charset won't be changed. In both cases,
583 current default charset will be returned.
584
585 B<NOTE>: Default charset I<should not> be changed.
586
587 =cut
588
589 sub default(;$) {
590 my $charset = &canonical_charset(shift);
591
592 if ($charset) {
593 croak "Unknown charset '$charset'"
594 unless resolve_alias($charset);
595 $DEFAULT_CHARSET = $charset;
596 }
597 return $DEFAULT_CHARSET;
598 }
599
600 =item fallback [CHARSET]
601
602 Get/set fallback charset.
603
604 B<Fallback charset> is used by this module when conversion by given
605 charset is failed and C<"FALLBACK"> error handling scheme is specified.
606 Modules using this module may use this charset as last resort of charset
607 for conversion. By default, it is C<"UTF-8">.
608
609 If CHARSET is given and isn't false, it will be set to fallback charset.
610 If CHARSET is C<"NONE">, fallback charset will be undefined.
611 Otherwise, fallback charset won't be changed. In any cases,
612 current fallback charset will be returned.
613
614 B<NOTE>: It I<is> useful that C<"US-ASCII"> is specified as fallback charset,
615 since result of conversion will be readable without charset informations.
616
617 =cut
618
619 sub fallback(;$) {
620 my $charset = &canonical_charset(shift);
621
622 if ($charset eq "NONE") {
623 $FALLBACK_CHARSET = undef;
624 } elsif ($charset) {
625 croak "Unknown charset '$charset'"
626 unless resolve_alias($charset);
627 $FALLBACK_CHARSET = $charset;
628 }
629 return $FALLBACK_CHARSET;
630 }
631
632 =item recommended CHARSET [, HEADERENC, BODYENC [, ENCCHARSET]]
633
634 Get/set charset profiles.
635
636 If optional arguments are given and any of them are not false, profiles
637 for CHARSET will be set by those arguments. Otherwise, profiles
638 won't be changed. In both cases, current profiles for CHARSET will be
639 returned as 3-item list of (HEADERENC, BODYENC, ENCCHARSET).
640
641 HEADERENC is recommended encoding scheme for message header.
642 It may be one of C<"B">, C<"Q">, C<"S"> (shorter one of either) or
643 C<undef> (might not be encoded).
644
645 BODYENC is recommended transfer-encoding for message body. It may be
646 one of C<"B">, C<"Q"> or C<undef> (might not be transfer-encoded).
647
648 ENCCHARSET is a charset which is compatible with given CHARSET and
649 is recommended to be used for MIME messages on Internet.
650 If conversion is not needed (or this module doesn't know appropriate
651 charset), ENCCHARSET is C<undef>.
652
653 B<NOTE>: This function in the future releases can accept more optional
654 arguments (for example, properties to handle character widths, line folding
655 behavior, ...). So format of returned value may probably be changed.
656 Use L<"header_encoding">, L<"body_encoding"> or L<"output_charset"> to get
657 particular profile.
658
659 =cut
660
661 sub recommended ($;$;$;$) {
662 my $charset = &canonical_charset(shift);
663 my $henc = uc(shift) || undef;
664 my $benc = uc(shift) || undef;
665 my $cset = &canonical_charset(shift);
666
667 croak "CHARSET is not specified" unless $charset;
668 croak "Unknown header encoding" unless !$henc or $henc =~ /^[BQS]$/;
669 croak "Unknown body encoding" unless !$benc or $benc =~ /^[BQ]$/;
670
671 if ($henc or $benc or $cset) {
672 $cset = undef if $charset eq $cset;
673 my @spec = ($henc, $benc, USE_ENCODE? $cset: undef);
674 $CHARSETS{$charset} = \@spec;
675 return @spec;
676 } else {
677 my $spec = $CHARSETS{$charset};
678 if ($spec) {
679 return ($$spec[0], $$spec[1], USE_ENCODE? $$spec[2]: undef);
680 } else {
681 return ('S', 'B', undef);
682 }
683 }
684 }
685
686 =head2 CONSTANTS
687
688 =item USE_ENCODE
689
690 Unicode/multibyte support flag.
691 Non-null string will be set when Unicode and multibyte support is enabled.
692 Currently, this flag will be non-null on Perl 5.8.1 or later and
693 null string on earlier versions of Perl.
694
695 =head2 ERROR HANDLING
696
697 L<"body_encode"> and L<"header_encode"> accept following C<Replacement>
698 options:
699
700 =item C<"DEFAULT">
701
702 Put a substitution character in place of a malformed character.
703 For UCM-based encodings, <subchar> will be used.
704
705 =item C<"FALLBACK">
706
707 Try C<"DEFAULT"> scheme using I<fallback charset> (see L<"fallback">).
708 When fallback charset is undefined and conversion causes error,
709 code will die on error with an error message.
710
711 =item C<"CROAK">
712
713 Code will die on error immediately with an error message.
714 Therefore, you should trap the fatal error with eval{} unless you
715 really want to let it die on error.
716 Synonym is C<"STRICT">.
717
718 =item C<"PERQQ">
719
720 =item C<"HTMLCREF">
721
722 =item C<"XMLCREF">
723
724 Use C<FB_PERLQQ>, C<FB_HTMLCREF> or C<FB_XMLCREF>
725 scheme defined by L<Encode> module.
726
727 =back
728
729 If error handling scheme is not specified or unknown scheme is specified,
730 C<"DEFAULT"> will be assumed.
731
732 =head1 SEE ALSO
733
734 Multipurpose Internet Mail Extensions (MIME).
735
736 =head1 AUTHORS
737
738 Copyright (C) 2006 Hatuka*nezumi - IKEDA Soji <hatuka(at)nezumi.nu>.
739
740 All rights reserved. This program is free software; you can redistribute
741 it and/or modify it under the same terms as Perl itself.
742
743 =cut
744
745 1;

  ViewVC Help
Powered by ViewVC 1.1.5