##---------------------------------------------------------------------------## ## File: ## $Id$ ## Author(s): ## Earl Hood mhonarc@mhonarc.org ## NIIBE Yutaka gniibe@mri.co.jp ## Takashi P.KATOH p-katoh@shiratori.riec.tohoku.ac.jp ## NAKANE, Masafumi ## Description: ## Library defines routine to process euc-jp data. ##---------------------------------------------------------------------------## ## Copyright (C) 1995-2003 ## Earl Hood, mhonarc@mhonarc.org ## NIIBE Yutaka, gniibe@mri.co.jp ## Takashi P.KATOH, p-katoh@shiratori.riec.tohoku.ac.jp ## NAKANE, Masafumi ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ## 02111-1307, USA ##---------------------------------------------------------------------------## package euc_jp; $Url = '(http://|https://|ftp://|afs://|wais://|telnet://|ldap://' . '|gopher://|news:|nntp:|mid:|cid:|mailto:|prospero:)'; $UrlExp = $Url . q%[^\s\(\)\|<>"']*[^\.?!;,"'\|\[\]\(\)\s<>]%; $HUrlExp = $Url . q%[^\s\(\)\|<>"'\&]*[^\.?!;,"'\|\[\]\(\)\s<>\&]%; $eucjpchar = '^(\x8f[\xa1-\xfe][\xa1-\xfe]|'. # 3 byte EUC '[\xa1-\xfe][\xa1-\xfe]|\x8e[\xa1-\xdf])'; ##---------------------------------------------------------------------------## ## str2html(): Convert an euc-jp string into HTML. Function ## interface is similiar to iso2022jp.pl functions. ## sub str2html { eucjp_to_html($_[0], 1); } ##---------------------------------------------------------------------------## ## Function to convert euc-jp data into HTML. sub eucjp_to_html { my ($body, $nourl) = @_; my @lines = split(/\r?\n/,$body); my ($ret, $ascii_text); $ret = ""; my $cnt = scalar(@lines); my $i = 0; foreach my $l (@lines) { while( $l ne '' ) { if ( $l =~ s/^([\x00-\x7f]+)// ) { # ascii $ascii_text = $1; # Replace meta characters in ASCII plain text $ascii_text =~ s%\&%\&%g; $ascii_text =~ s%<%\<%g; $ascii_text =~ s%>%\>%g; ## Convert URLs to hyperlinks $ascii_text =~ s%($HUrlExp)%$1%gio unless $nourl; $ret .= $ascii_text; } elsif ( $l =~ s%($eucjpchar)%% ) { $ret .= $1; } else { # Something is wrong in the text $ret .= $l; last; } } # add back eol $ret .= "\n" unless (++$i >= $cnt); } return $ret; } ##---------------------------------------------------------------------------## ## clip($str, $length, $is_html, $has_tags): Clip an euc-jp string. ## ## $is_html specifies whether '&' should be treated ## as HTML character or not. ## (i.e., the length of '&' will be 1 if $is_html). ## sub clip { # &clip($str, 10, 1, 1); my ($str, $length, $is_html, $has_tags) = @_; my $ret = ""; while ( $length > 0 and $str ne '' ) { if ( $str =~ s/^([\x00-\x7f])// ) { # ascii if ($is_html) { if ( $1 eq '<' and $has_tags) { $str =~ s/^[^>]*>//; } elsif ($1 eq '&') { my $s = $1; $s =~ s/^([^\;]*\;)//; $ret .= "&$s"; } else { $ret .= $1; } $length--; } else { # !$is_html $ret .= $1; $length--; } } elsif ( $str =~ s/($eucjpchar)// ) { # non-ascii $ret .= $1; $length -= 2; } else { # something is wrong $ret .= $str; last; } } return $ret; } ##---------------------------------------------------------------------------## 1;