home *** CD-ROM | disk | FTP | other *** search
- package DocSet::DocSet;
-
- use strict;
- use warnings;
-
- use DocSet::Util;
- use DocSet::RunTime;
- use DocSet::Cache ();
- use DocSet::Doc ();
- use DocSet::NavigateCache ();
-
- use File::Spec::Functions;
-
- use vars qw(@ISA);
- use DocSet::Config ();
- @ISA = qw(DocSet::Config);
-
- ########
- sub new {
- my $class = shift;
- my $self = bless {}, ref($class)||$class;
- $self->init(@_);
- return $self;
- }
-
- sub init {
- my($self, $config_file, $parent_o, $src_rel_dir) = @_;
-
- $self->read_config($config_file, $parent_o);
-
- # are we inside a super docset?
- if ($parent_o and ref($parent_o)) {
- $self->{parent_o} = $parent_o;
- $self->merge_config($src_rel_dir);
- }
-
- # we assume that the docset was not modified since the last run.
- # if at least one source doc/config file was modified, the docset
- # is considered modified as well and should be rebuild. It's the
- # responsibility of the modified object to set its parent docset
- # status to 'modified'.
- $self->modified(0);
-
- # currently a given docset is considered to be in the 'modified' state,
- # if any of these conditions is true:
- #
- # 1. the included docset is 'modified':
- # 2. the included chapter is 'modified':
- # 3. the included 'copy as-is' files are 'modified':
- # 4. config.cfg is newer than corresponding index.html
- # 5. the cache file is missing
-
- }
-
- sub scan {
- my($self) = @_;
-
- my $src_root = $self->get_dir('src_root');
- my $purge = DocSet::RunTime::get_opts('rebuild_all') ? 1 : 0;
- my $update = 1; # see DocSetCache::new
- # each output mode need its own cache, because of the destination
- # links which are different
- my $mode = $self->get('tmpl_mode');
- my $cache_file = "$src_root/cache.$mode.dat";
-
- # - create the new cache object for updates
- # - rebuild_all forces the existing cache's purge
- my $cache = DocSet::Cache->new($cache_file, $update, $purge);
- $self->cache($cache); # add to the docset object
-
- # a complete rebuild of the docset is done when:
- # - we are told to do so:
- # - if the cache file doesn't exist
- # - or the we failed to retrieve an existing cache
- if (DocSet::RunTime::get_opts('rebuild_all') ||
- $cache->read_error || !$cache->can_read) {
- $self->modified(1);
- $self->rebuild(1);
- }
-
- # cache the index node meta data
- $cache->index_node(id => $self->get('id'),
- stitle => $self->get('stitle'),
- title => $self->get('title'),
- abstract => $self->get('abstract'),
- extra => $self->get('extra'),
- );
-
- # croaks if the docset id is duplicated
- $self->check_duplicated_docset_ids();
-
- # cache the location of the parent node cache
- if (my $parent_o = $self->get('parent_o')) {
- my $parent_src_root = $parent_o->get_dir('src_root');
- (my $rel2parent_src_root = $src_root) =~ s|\Q$parent_src_root||;
- my $rel_dir = join '/', ("..") x ($rel2parent_src_root =~ tr|/|/|);
- my $parent_cache_path = "$parent_src_root/cache.$mode.dat";
- $cache->parent_node($parent_cache_path,
- $self->get('id'),
- $rel_dir);
- $self->set_dir(rel_parent_root => $rel_dir);
- }
- else {
- $self->set_dir(rel_parent_root => '.');
- }
-
- ###
- # scan the nodes of the current level and cache the meta and other
- # data
-
- my $hidden = 0;
- my @nodes_by_type = @{ $self->nodes_by_type };
- while (@nodes_by_type) {
- my($type, $data) = splice @nodes_by_type, 0, 2;
- if ($type eq 'docsets') {
- my $docset = $self->docset_scan_n_cache($data, $hidden);
- $self->modified(1) if $docset->modified();
- $self->object_store($docset)
- if defined $docset and ref $docset;
-
- } elsif ($type eq 'chapters') {
- my $chapter = $self->chapter_scan_n_cache($data, $hidden);
- if (defined $chapter and ref $chapter) {
- # modified chapter --> modified docset
- $self->modified(1);
- $self->object_store($chapter)
- }
- } elsif ($type eq 'links') {
- $self->link_scan_n_cache($data, $hidden);
- # we don't need to process links
- } elsif ($type eq 'sitemap') {
- $self->sitemap_cache($data, $hidden);
- # we don't need to process links
- } else {
- # nothing
- }
-
- }
-
- # the same but for the hidden objects
- $hidden = 1;
- my @hidden_nodes_by_type = @{ $self->hidden_nodes_by_type };
- while (@hidden_nodes_by_type) {
- my($type, $data) = splice @hidden_nodes_by_type, 0, 2;
- if ($type eq 'docsets') {
- my $docset = $self->docset_scan_n_cache($data, $hidden);
- $self->object_store($docset)
- if defined $docset and ref $docset;
-
- } elsif ($type eq 'chapters') {
- my $chapter = $self->chapter_scan_n_cache($data, $hidden);
- if (defined $chapter and ref $chapter) {
- # modified chapter --> modified docset
- $self->modified(1);
- $self->object_store($chapter)
- }
-
- } else {
- # nothing
- }
- }
-
- $cache->node_groups($self->node_groups);
-
- # compare whether the config file is newer than the corresponding
- # index.html
- my $dst_root = $self->get_dir('dst_root');
- my $config_file = $self->{config_file};
-
- my $dst_index = "$dst_root/index.html";
- my($should_update, $reason) =
- $self->should_update($config_file, $dst_index);
- $self->modified(1) if $should_update;
-
- # if @body{qw(top bot)} component files exist, check whether they
- # are newer than the target index.html file
- if (my $body = $self->get('body')) {
- my $src_root = $self->get_dir('src_root');
- for my $sec (qw(top bot)) {
- my $src_file = $body->{$sec};
- next unless $src_file;
- $src_file = catfile $src_root, $src_file;
- my($should_update, $reason) =
- $self->should_update($src_file, $dst_index);
- $self->modified(1) if $should_update;
- }
- }
-
- # sync the cache
- $cache->write;
-
- # copy non-pod files like images and stylesheets
- #
- # META: though this belongs to the 'render' part, we run it here,
- # since we need to know after the scan() whether the docset is
- # modified. a cleaner, logic-wise, solution would be only to check
- # modification times on files that may need to be copied as-is,
- # but to postpone the copying, if any, only to the render part of
- # the logic. We could also remove here all the files that don't
- # need to be copied, since they didn't change.
- $self->scan_copy_the_rest;
-
- }
-
-
- sub docset_scan_n_cache {
- my($self, $src_rel_dir, $hidden) = @_;
-
- my $src_root = $self->get_dir('src_root');
- my $config_file = "$src_root/$src_rel_dir/config.cfg";
- my $docset = $self->new($config_file, $self, $src_rel_dir);
- $docset->scan;
-
- # cache the child docset's meta data
- my $id = $docset->get('id');
- $self->cache->add($id);
- my $meta = {
- stitle => $docset->get('stitle'),
- title => $docset->get('title'),
- link => "$src_rel_dir/index.html",
- abstract => $docset->get('abstract'),
- rel_path => $src_rel_dir,
- };
- $self->cache->set($id, 'meta', $meta, $hidden);
-
- # add the location of the cache file, so later we can traverse the
- # nodes, by just reading the cache files, which are linked to each
- # other both ways.
- my $mode = $self->get('tmpl_mode');
- my $child_cache_path = "$src_root/$src_rel_dir/cache.$mode.dat";
- $self->cache->set($id, 'child_cache_path', $child_cache_path);
-
- note "\n"; # mark the end of scan
-
- return $docset;
- }
-
-
-
- sub link_scan_n_cache {
- my($self, $link, $hidden) = @_;
- my %meta = %$link; # make a copy
- my $id = delete $meta{id};
- $meta{title} = $meta{stitle} unless exists $meta{title};
- $meta{stitle} = $meta{title} unless exists $meta{stitle};
- $self->cache->add($id);
- $self->cache->set($id, 'meta', \%meta, $hidden);
- }
-
- sub sitemap_cache {
- my($self, $link, $hidden) = @_;
- my %meta = %$link; # make a copy
- my $id = $meta{id};
- $meta{title} = $meta{stitle} unless exists $meta{title};
- $meta{stitle} = $meta{title} unless exists $meta{stitle};
- $self->cache->add($id);
- $self->cache->set($id, 'meta', \%meta, $hidden);
-
- # we will need to raise this flag to render the doc
- # XXX: consider creating a Sitemap class, so we can handle this
- # generically as chapters and docsets
- $self->{sitemap} = \%meta;
- # see Config::sitemap method
- }
-
- sub chapter_scan_n_cache {
- my($self, $src_file, $hidden) = @_;
-
- my $id = $src_file;
- $self->cache->add($id);
-
- my $trg_ext = $self->trg_ext();
-
- my $src_root = $self->get_dir('src_root');
- my $dst_root = $self->get_dir('dst_root');
- my $abs_doc_root = $self->get_dir('abs_doc_root');
- my $src_path = "$src_root/$src_file";
-
- my $src_ext = filename_ext($src_file)
- or die "cannot get an extension for $src_file [$src_path]";
- my $src_mime = $self->ext2mime($src_ext)
- or die "unknown extension: $src_ext [$src_path]";
- (my $basename = $src_file) =~ s/\.$src_ext$//;
-
- # destination paths
- my $rel_dst_path = "$basename.$trg_ext";
- $rel_dst_path =~ s|^\./||; # strip the leading './'
- my $dst_path = "$dst_root/$rel_dst_path";
-
- my $rel_doc_root = $rel_dst_path =~ m|/|
- ? join('/', ("..") x ($rel_dst_path =~ tr|/|/|))
- : '.';
-
- # push to the list of final chapter paths e.g. used by PS/PDF
- # build, which needs all the non-hidden chapters
- $self->trg_chapters($rel_dst_path) unless $hidden;
-
- ### to rebuild or not
- my($should_update, $reason) = $self->should_update($src_path, $dst_path);
- if (!$should_update) {
- note "--- $src_file: skipping ($reason)";
- return undef;
- }
-
- ### init
- note "+++ $src_file: processing ($reason)";
- my $dst_mime = $self->get('dst_mime');
- my $conv_class = $self->conv_class($src_mime, $dst_mime);
- require_package($conv_class);
-
- my $chapter = $conv_class->new(
- docset => $self,
- tmpl_mode => $self->get('tmpl_mode'),
- tmpl_root => $self->get_dir('tmpl'),
- src_root => $src_root,
- dst_root => $dst_root,
- src_uri => $src_file,
- src_path => $src_path,
- dst_path => $dst_path,
- rel_dst_path => $rel_dst_path,
- rel_doc_root => $rel_doc_root,
- abs_doc_root => $abs_doc_root,
- path_from_base => $self->get_dir('path_from_base'),
- );
-
- $chapter->scan();
-
- # cache the chapter's meta and toc data
- $self->cache->set($id, 'meta', $chapter->meta, $hidden);
- $self->cache->set($id, 'toc', $chapter->toc, $hidden);
-
- return $chapter;
-
- }
-
- ####################
- sub scan_copy_the_rest {
- my($self) = @_;
-
- my @scan_copy_files = @{ $self->files_to_scan_copy() };
-
- return unless @scan_copy_files;
-
- my %to_copy = ();
-
- my $src_root = $self->get_dir('src_root');
- my $dst_root = $self->get_dir('dst_root');
- note "+++ Scanning the copy as-is files. Comparing $src_root with $dst_root";
- foreach my $src_path (@scan_copy_files){
- my $dst_path = $src_path;
- # # some OSs's File::Find returns files with no dir prefix root
- # # (that's what ()* is for
- # $dst_path =~ s/(?:$src_root)*/$dst_root/;
- $dst_path =~ s/\Q$src_root/$dst_root/;
-
- # to rebuild or not to rebuild
- my($should_update, $reason) =
- $self->should_update($src_path, $dst_path);
- if (!$should_update) {
- note "--- skipping cp $src_path $dst_path ($reason)";
- next;
- }
- $self->modified(1); # dirty state
- note "+++ processing $src_path => $dst_path ($reason)";
- $to_copy{$src_path} = $dst_path;
- }
-
- $self->files_to_copy(\%to_copy);
- }
-
- sub render {
- my($self) = @_;
-
- # if the docset wasn't modified, don't render the docset
- return unless $self->modified();
-
- $self->copy_the_rest;
-
- my $src_root = $self->get_dir('src_root');
-
- # each output mode need its own cache, because of the destination
- # links which are different
- my $mode = $self->get('tmpl_mode');
- my $path = "$src_root/cache.$mode.dat";
- my $cache = DocSet::Cache->new($path);
-
- die "Failed to read cache from $path: " . $cache->read_error
- if $cache->read_error;
-
- # render the objects no matter what kind are they
- for my $obj ($self->stored_objects) {
- $obj->render($cache);
- }
-
- $self->complete;
-
- }
-
- ####################
- sub copy_the_rest {
- my($self) = @_;
-
- my %copy_files = %{ $self->files_to_copy };
-
- return unless %copy_files;
-
- my $src_root = $self->get_dir('src_root');
- my $dst_root = $self->get_dir('dst_root');
- note "+++ Copying the non-processed files from $src_root to $dst_root";
- while (my ($src_path, $dst_path) = each %copy_files) {
- note "+++ cp $src_path $dst_path";
- copy_file($src_path, $dst_path);
- }
- }
-
-
- # an abstract method
- sub complete {}
-
- # die with the error, and supply the context in which the error has happened
- sub error {
- my $self = shift;
-
- my @context;
- push @context, "config file: $self->{config_file}";
-
- die map({"!!! err: $_\n"} @_),
- "in context:\n", map({"\t$_\n"} @context);
-
- }
-
- sub should_update {
- my($self, $src_path, $dst_path) = @_;
-
- unless (-e $src_path) {
- $self->error("cannot find $src_path");
- }
-
- # to rebuild or not to rebuild
- my $not_modified =
- (-e $dst_path and -M $dst_path < -M $src_path) ? 1 : 0;
-
- my $reason = $not_modified ? 'not modified' : 'modified';
- if ($self->rebuild()) {
- return (1, "$reason / forced");
- }
- else {
- return (!$not_modified, $reason);
- }
-
- }
-
- 1;
- __END__
-
- =head1 NAME
-
- C<DocSet::DocSet> - An abstract docset generation class
-
- =head1 SYNOPSIS
-
- use DocSet::DocSet::HTML ();
- my $docset = DocSet::DocSet::HTML->new($config_file);
-
- # must start from the abs root
- chdir $abs_root;
-
- # must be a relative path to be able to move the generated code from
- # location to location, without adjusting the links
- $docset->set_dir(abs_root => ".");
- $docset->scan;
- $docset->render;
-
- my $should_update = $self->should_update($src_path, $dst_path);
-
- =head1 DESCRIPTION
-
- C<DocSet::DocSet> processes a docset, which can include other docsets,
- documents and links. In the first pass it scans the linked to it
- documents and other docsets and caches this information and the
- objects for a later peruse. In the second pass the stored objects are
- rendered. And the docset is completed.
-
- This class cannot be used on its own and has to be subclassed and
- extended, by the sub-classes which has a specific to input and output
- formats of the documents that need to be processed. It handles only
- the partial functionality which doesn't require format specific
- knowledge.
-
- =head2 METHODS
-
- This class inherits from C<DocSet::Config> and you will find the
- documentation of methods inherited from this class in its pod.
-
- The following "public" methods are implemented in this super-class:
-
- =over
-
- =item * new
-
- $class->new($config_file, $parent_o, $src_rel_dir);
-
- =item * init
-
- $self->init($config_file, $parent_o, $src_rel_dir);
-
- =item * scan
-
- $self->scan();
-
- Scans the docset for meta data and tocs of its items and caches this
- information and the item objects.
-
- =item * scan_copy_the_rest
-
- $self->scan_copy_the_rest()
-
- Process the files that should be copied as is without processing
- (i.e. images, css files, etc). If any of the items have a timestamp
- newer than the corresponding copy in the target destination, the whole
- docset will be rebuilt.
-
- Only files that were modified will be copied during the render phase.
-
- =item * render
-
- $self->render();
-
- Calls the render() method of each of the stored objects and creates an
- index page linking all the items.
-
- =item * copy_the_rest
-
- $self->copy_the_rest()
-
- Copies the files which aren't processed (i.e. images, css files, etc.)
- and were modified as-is.
-
- =item * should_update
-
- my $should_update = $self->should_update($src_path, $dst_path);
-
- Compare the timestamps/existance of src and dst paths and return
- (true, reason) if src is newer than dst otherwise return (false,
- reason)
-
- If rebuild_all runtime is on, this always returns (true, reason)
-
- =back
-
- =head2 ABSTRACT METHODS
-
- The following methods should be implemented by the sub-classes.
-
- =over
-
- =item * parse
-
- =item * retrieve_meta_data
-
- =item * convert
-
- =item * complete
-
- $self->complete();
-
- put here anything that should be run after all the items have been
- rendered and all the meta info has been collected. i.e. generation of
- the I<index> file, to link to all the links and the parent node if
- such exists.
-
- =back
-
- =head1 AUTHORS
-
- Stas Bekman E<lt>stas (at) stason.orgE<gt>
-
- =cut
-