PC Professionell 2004 December

home *** CD-ROM | disk | FTP | other *** search

/ PC Professionell 2004 December / PCpro_2004_12.ISO / files / webserver / tsw / TSW_3.4.0.exe / Apache2 / perl / DocSet.pm < prev next >

Wrap

Perl POD Document | 2003-04-09 | 16.9 KB | 579 lines

package DocSet::DocSet; use strict; use warnings; use DocSet::Util; use DocSet::RunTime; use DocSet::Cache (); use DocSet::Doc (); use DocSet::NavigateCache (); use File::Spec::Functions; use vars qw(@ISA); use DocSet::Config (); @ISA = qw(DocSet::Config); ######## sub new { my $class = shift; my $self = bless {}, ref($class)||$class; $self->init(@_); return $self; } sub init { my($self, $config_file, $parent_o, $src_rel_dir) = @_; $self->read_config($config_file, $parent_o); # are we inside a super docset? if ($parent_o and ref($parent_o)) { $self->{parent_o} = $parent_o; $self->merge_config($src_rel_dir); } # we assume that the docset was not modified since the last run. # if at least one source doc/config file was modified, the docset # is considered modified as well and should be rebuild. It's the # responsibility of the modified object to set its parent docset # status to 'modified'. $self->modified(0); # currently a given docset is considered to be in the 'modified' state, # if any of these conditions is true: # # 1. the included docset is 'modified': # 2. the included chapter is 'modified': # 3. the included 'copy as-is' files are 'modified': # 4. config.cfg is newer than corresponding index.html # 5. the cache file is missing } sub scan { my($self) = @_; my $src_root = $self->get_dir('src_root'); my $purge = DocSet::RunTime::get_opts('rebuild_all') ? 1 : 0; my $update = 1; # see DocSetCache::new # each output mode need its own cache, because of the destination # links which are different my $mode = $self->get('tmpl_mode'); my $cache_file = "$src_root/cache.$mode.dat"; # - create the new cache object for updates # - rebuild_all forces the existing cache's purge my $cache = DocSet::Cache->new($cache_file, $update, $purge); $self->cache($cache); # add to the docset object # a complete rebuild of the docset is done when: # - we are told to do so: # - if the cache file doesn't exist # - or the we failed to retrieve an existing cache if (DocSet::RunTime::get_opts('rebuild_all') || $cache->read_error || !$cache->can_read) { $self->modified(1); $self->rebuild(1); } # cache the index node meta data $cache->index_node(id => $self->get('id'), stitle => $self->get('stitle'), title => $self->get('title'), abstract => $self->get('abstract'), extra => $self->get('extra'), ); # croaks if the docset id is duplicated $self->check_duplicated_docset_ids(); # cache the location of the parent node cache if (my $parent_o = $self->get('parent_o')) { my $parent_src_root = $parent_o->get_dir('src_root'); (my $rel2parent_src_root = $src_root) =~ s|\Q$parent_src_root||; my $rel_dir = join '/', ("..") x ($rel2parent_src_root =~ tr|/|/|); my $parent_cache_path = "$parent_src_root/cache.$mode.dat"; $cache->parent_node($parent_cache_path, $self->get('id'), $rel_dir); $self->set_dir(rel_parent_root => $rel_dir); } else { $self->set_dir(rel_parent_root => '.'); } ### # scan the nodes of the current level and cache the meta and other # data my $hidden = 0; my @nodes_by_type = @{ $self->nodes_by_type }; while (@nodes_by_type) { my($type, $data) = splice @nodes_by_type, 0, 2; if ($type eq 'docsets') { my $docset = $self->docset_scan_n_cache($data, $hidden); $self->modified(1) if $docset->modified(); $self->object_store($docset) if defined $docset and ref $docset; } elsif ($type eq 'chapters') { my $chapter = $self->chapter_scan_n_cache($data, $hidden); if (defined $chapter and ref $chapter) { # modified chapter --> modified docset $self->modified(1); $self->object_store($chapter) } } elsif ($type eq 'links') { $self->link_scan_n_cache($data, $hidden); # we don't need to process links } elsif ($type eq 'sitemap') { $self->sitemap_cache($data, $hidden); # we don't need to process links } else { # nothing } } # the same but for the hidden objects $hidden = 1; my @hidden_nodes_by_type = @{ $self->hidden_nodes_by_type }; while (@hidden_nodes_by_type) { my($type, $data) = splice @hidden_nodes_by_type, 0, 2; if ($type eq 'docsets') { my $docset = $self->docset_scan_n_cache($data, $hidden); $self->object_store($docset) if defined $docset and ref $docset; } elsif ($type eq 'chapters') { my $chapter = $self->chapter_scan_n_cache($data, $hidden); if (defined $chapter and ref $chapter) { # modified chapter --> modified docset $self->modified(1); $self->object_store($chapter) } } else { # nothing } } $cache->node_groups($self->node_groups); # compare whether the config file is newer than the corresponding # index.html my $dst_root = $self->get_dir('dst_root'); my $config_file = $self->{config_file}; my $dst_index = "$dst_root/index.html"; my($should_update, $reason) = $self->should_update($config_file, $dst_index); $self->modified(1) if $should_update; # if @body{qw(top bot)} component files exist, check whether they # are newer than the target index.html file if (my $body = $self->get('body')) { my $src_root = $self->get_dir('src_root'); for my $sec (qw(top bot)) { my $src_file = $body->{$sec}; next unless $src_file; $src_file = catfile $src_root, $src_file; my($should_update, $reason) = $self->should_update($src_file, $dst_index); $self->modified(1) if $should_update; } } # sync the cache $cache->write; # copy non-pod files like images and stylesheets # # META: though this belongs to the 'render' part, we run it here, # since we need to know after the scan() whether the docset is # modified. a cleaner, logic-wise, solution would be only to check # modification times on files that may need to be copied as-is, # but to postpone the copying, if any, only to the render part of # the logic. We could also remove here all the files that don't # need to be copied, since they didn't change. $self->scan_copy_the_rest; } sub docset_scan_n_cache { my($self, $src_rel_dir, $hidden) = @_; my $src_root = $self->get_dir('src_root'); my $config_file = "$src_root/$src_rel_dir/config.cfg"; my $docset = $self->new($config_file, $self, $src_rel_dir); $docset->scan; # cache the child docset's meta data my $id = $docset->get('id'); $self->cache->add($id); my $meta = { stitle => $docset->get('stitle'), title => $docset->get('title'), link => "$src_rel_dir/index.html", abstract => $docset->get('abstract'), rel_path => $src_rel_dir, }; $self->cache->set($id, 'meta', $meta, $hidden); # add the location of the cache file, so later we can traverse the # nodes, by just reading the cache files, which are linked to each # other both ways. my $mode = $self->get('tmpl_mode'); my $child_cache_path = "$src_root/$src_rel_dir/cache.$mode.dat"; $self->cache->set($id, 'child_cache_path', $child_cache_path); note "\n"; # mark the end of scan return $docset; } sub link_scan_n_cache { my($self, $link, $hidden) = @_; my %meta = %$link; # make a copy my $id = delete $meta{id}; $meta{title} = $meta{stitle} unless exists $meta{title}; $meta{stitle} = $meta{title} unless exists $meta{stitle}; $self->cache->add($id); $self->cache->set($id, 'meta', \%meta, $hidden); } sub sitemap_cache { my($self, $link, $hidden) = @_; my %meta = %$link; # make a copy my $id = $meta{id}; $meta{title} = $meta{stitle} unless exists $meta{title}; $meta{stitle} = $meta{title} unless exists $meta{stitle}; $self->cache->add($id); $self->cache->set($id, 'meta', \%meta, $hidden); # we will need to raise this flag to render the doc # XXX: consider creating a Sitemap class, so we can handle this # generically as chapters and docsets $self->{sitemap} = \%meta; # see Config::sitemap method } sub chapter_scan_n_cache { my($self, $src_file, $hidden) = @_; my $id = $src_file; $self->cache->add($id); my $trg_ext = $self->trg_ext(); my $src_root = $self->get_dir('src_root'); my $dst_root = $self->get_dir('dst_root'); my $abs_doc_root = $self->get_dir('abs_doc_root'); my $src_path = "$src_root/$src_file"; my $src_ext = filename_ext($src_file) or die "cannot get an extension for $src_file [$src_path]"; my $src_mime = $self->ext2mime($src_ext) or die "unknown extension: $src_ext [$src_path]"; (my $basename = $src_file) =~ s/\.$src_ext$//; # destination paths my $rel_dst_path = "$basename.$trg_ext"; $rel_dst_path =~ s|^\./||; # strip the leading './' my $dst_path = "$dst_root/$rel_dst_path"; my $rel_doc_root = $rel_dst_path =~ m|/| ? join('/', ("..") x ($rel_dst_path =~ tr|/|/|)) : '.'; # push to the list of final chapter paths e.g. used by PS/PDF # build, which needs all the non-hidden chapters $self->trg_chapters($rel_dst_path) unless $hidden; ### to rebuild or not my($should_update, $reason) = $self->should_update($src_path, $dst_path); if (!$should_update) { note "--- $src_file: skipping ($reason)"; return undef; } ### init note "+++ $src_file: processing ($reason)"; my $dst_mime = $self->get('dst_mime'); my $conv_class = $self->conv_class($src_mime, $dst_mime); require_package($conv_class); my $chapter = $conv_class->new( docset => $self, tmpl_mode => $self->get('tmpl_mode'), tmpl_root => $self->get_dir('tmpl'), src_root => $src_root, dst_root => $dst_root, src_uri => $src_file, src_path => $src_path, dst_path => $dst_path, rel_dst_path => $rel_dst_path, rel_doc_root => $rel_doc_root, abs_doc_root => $abs_doc_root, path_from_base => $self->get_dir('path_from_base'), ); $chapter->scan(); # cache the chapter's meta and toc data $self->cache->set($id, 'meta', $chapter->meta, $hidden); $self->cache->set($id, 'toc', $chapter->toc, $hidden); return $chapter; } #################### sub scan_copy_the_rest { my($self) = @_; my @scan_copy_files = @{ $self->files_to_scan_copy() }; return unless @scan_copy_files; my %to_copy = (); my $src_root = $self->get_dir('src_root'); my $dst_root = $self->get_dir('dst_root'); note "+++ Scanning the copy as-is files. Comparing $src_root with $dst_root"; foreach my $src_path (@scan_copy_files){ my $dst_path = $src_path; # # some OSs's File::Find returns files with no dir prefix root # # (that's what ()* is for # $dst_path =~ s/(?:$src_root)*/$dst_root/; $dst_path =~ s/\Q$src_root/$dst_root/; # to rebuild or not to rebuild my($should_update, $reason) = $self->should_update($src_path, $dst_path); if (!$should_update) { note "--- skipping cp $src_path $dst_path ($reason)"; next; } $self->modified(1); # dirty state note "+++ processing $src_path => $dst_path ($reason)"; $to_copy{$src_path} = $dst_path; } $self->files_to_copy(\%to_copy); } sub render { my($self) = @_; # if the docset wasn't modified, don't render the docset return unless $self->modified(); $self->copy_the_rest; my $src_root = $self->get_dir('src_root'); # each output mode need its own cache, because of the destination # links which are different my $mode = $self->get('tmpl_mode'); my $path = "$src_root/cache.$mode.dat"; my $cache = DocSet::Cache->new($path); die "Failed to read cache from $path: " . $cache->read_error if $cache->read_error; # render the objects no matter what kind are they for my $obj ($self->stored_objects) { $obj->render($cache); } $self->complete; } #################### sub copy_the_rest { my($self) = @_; my %copy_files = %{ $self->files_to_copy }; return unless %copy_files; my $src_root = $self->get_dir('src_root'); my $dst_root = $self->get_dir('dst_root'); note "+++ Copying the non-processed files from $src_root to $dst_root"; while (my ($src_path, $dst_path) = each %copy_files) { note "+++ cp $src_path $dst_path"; copy_file($src_path, $dst_path); } } # an abstract method sub complete {} # die with the error, and supply the context in which the error has happened sub error { my $self = shift; my @context; push @context, "config file: $self->{config_file}"; die map({"!!! err: $_\n"} @_), "in context:\n", map({"\t$_\n"} @context); } sub should_update { my($self, $src_path, $dst_path) = @_; unless (-e $src_path) { $self->error("cannot find $src_path"); } # to rebuild or not to rebuild my $not_modified = (-e $dst_path and -M $dst_path < -M $src_path) ? 1 : 0; my $reason = $not_modified ? 'not modified' : 'modified'; if ($self->rebuild()) { return (1, "$reason / forced"); } else { return (!$not_modified, $reason); } } 1; __END__ =head1 NAME C<DocSet::DocSet> - An abstract docset generation class =head1 SYNOPSIS use DocSet::DocSet::HTML (); my $docset = DocSet::DocSet::HTML->new($config_file); # must start from the abs root chdir $abs_root; # must be a relative path to be able to move the generated code from # location to location, without adjusting the links $docset->set_dir(abs_root => "."); $docset->scan; $docset->render; my $should_update = $self->should_update($src_path, $dst_path); =head1 DESCRIPTION C<DocSet::DocSet> processes a docset, which can include other docsets, documents and links. In the first pass it scans the linked to it documents and other docsets and caches this information and the objects for a later peruse. In the second pass the stored objects are rendered. And the docset is completed. This class cannot be used on its own and has to be subclassed and extended, by the sub-classes which has a specific to input and output formats of the documents that need to be processed. It handles only the partial functionality which doesn't require format specific knowledge. =head2 METHODS This class inherits from C<DocSet::Config> and you will find the documentation of methods inherited from this class in its pod. The following "public" methods are implemented in this super-class: =over =item * new $class->new($config_file, $parent_o, $src_rel_dir); =item * init $self->init($config_file, $parent_o, $src_rel_dir); =item * scan $self->scan(); Scans the docset for meta data and tocs of its items and caches this information and the item objects. =item * scan_copy_the_rest $self->scan_copy_the_rest() Process the files that should be copied as is without processing (i.e. images, css files, etc). If any of the items have a timestamp newer than the corresponding copy in the target destination, the whole docset will be rebuilt. Only files that were modified will be copied during the render phase. =item * render $self->render(); Calls the render() method of each of the stored objects and creates an index page linking all the items. =item * copy_the_rest $self->copy_the_rest() Copies the files which aren't processed (i.e. images, css files, etc.) and were modified as-is. =item * should_update my $should_update = $self->should_update($src_path, $dst_path); Compare the timestamps/existance of src and dst paths and return (true, reason) if src is newer than dst otherwise return (false, reason) If rebuild_all runtime is on, this always returns (true, reason) =back =head2 ABSTRACT METHODS The following methods should be implemented by the sub-classes. =over =item * parse =item * retrieve_meta_data =item * convert =item * complete $self->complete(); put here anything that should be run after all the items have been rendered and all the meta info has been collected. i.e. generation of the I<index> file, to link to all the links and the parent node if such exists. =back =head1 AUTHORS Stas Bekman E<lt>stas (at) stason.orgE<gt> =cut