Plucene::Index::SegmentReader - the Segment reader
my $seg_reader =
Plucene::Index::SegmentReader->new( Plucene::Index::SegmentInfo $si);
my @files = $seg_reader->files;
my @terms = $seg_reader->terms;
my $doc = $seg_reader->document($id);
my $doc_freq = $seg_reader->doc_freq($term);
my $max_doc = $seg_reader->max_doc;
my $norms = $seg_reader->norms($field, $offset);
my Plucene::Index::SegmentTermDocs $docs
= $seg_reader->term_docs($term);
my Plucene::Index::SegmentTermPositions $pos
= $seg_reader->term_positions($term);
my Plucene::Store::InputStream $stream
= $seg_reader->norm_stream($field);
if ($seg_reader->is_deleted($id)) { .. }
if ($seg_reader->has_deletions(Plucene::Index::SegmentInfo $si))
{ ... }
=head1 DESCRIPTION
The segment reader class.
my $seg_reader =
Plucene::Index::SegmentReader->new( Plucene::Index::SegmentInfo $si);
This will create a new Plucene::Index::SegmentReader object.
if ($seg_reader->has_deletions(Plucene::Index::SegmentInfo $si))
{ ... }
my @files = $seg_reader->files;
my @terms = $seg_reader->terms;
my $doc = $seg_reader->document($id);
if ($seg_reader->is_deleted($id)) { .. }
my Plucene::Index::SegmentTermDocs $docs
= $seg_reader->term_docs($term);
This will return the Plucene::Index::SegmentTermDocs object for the
given term.
my Plucene::Index::SegmentTermPositions $pos
= $seg_reader->term_positions($term);
This will return the Plucene::Index::SegmentTermPositions object for the
given term.
my $doc_freq = $seg_reader->doc_freq($term);
This returns the number of documents containing the passed term.
=cut
sub doc_freq {
my ($self, $term) = @_;
my $ti = $self->{tis}->get($term) or return 0;
return $ti->doc_freq;
}
my $num_docs = $seg_reader->num_docs;
This is the number of documents, excluding deleted ones.
my $max_doc = $seg_reader->max_doc;
my $norms = $seg_reader->norms($field, $offset);
This returns the byte-encoded normalisation factor for the passed
field. This is used by the search code to score documents.
Note we are not using the 'offset' and 'bytes' arguments per the Java.
Instead, callers should use substr to put the result of ``norms'' into
the appropriate place in a string.
my Plucene::Store::InputStream $stream
= $seg_reader->norm_stream($field);
This will return the Plucene::Store::InputStream for the passed field.
=cut
| sub norm_stream {
| | my ($self, $field) = @_;
| | my $norm = $self->{norms}->{$field} or return; |
# Clone the norm's filehandle
my $clon = $norm->{in}->clone;
$clon->seek(0, 0);
return $clon;
}
sub _open_norms {
my $self = shift;
for my $fi (grep $_->is_indexed, $self->field_infos->fields) {
my $file = ``$self->{directory}/$self->{segment}.f'' . $fi->number;
my $fh = Plucene::Store::InputStream->new($file) or die $file . `` :'' . $!;
$self->{norms}{ $fi->name } = Plucene::Index::Norm->new($fh);
}
}
package Plucene::Index::Norm;
sub new { bless { in => $_[1] }, $_[0] }
# They have bytes, too, but we're not worrying about that.
1;
|