#!/usr/local/bin/perl # the cgi-lib.pl library #require 5.001; use strict; # Builds a smil file and any accompanying transcripts # returns a html file linking to the smil file generated. # Written by Mark Weal 2000 use FileHandle ; require "./cgi-lib.pl"; require "./buildrt.cgi"; use XML::DOM; ######################################### # globals which may need to be altered ######################################### my $xmlrootpath = "/data/archive/churchill/peregrine/xml/" ; my $filerootpath = "/data/archive/churchill/peregrine/tmp/" ; my $durinput = "01:00" ; ###########################################` # Matchup, compares search to keywords. ########################################### sub matchup ($$) { my ($search,$keywords) = @_ ; my %dictionary ; my $key ; foreach $key (keys %dictionary) { delete $dictionary{$key} ; } my @keywordlist = split(/,/,$keywords); my @searchlist = split(/ /,$search); @dictionary{@keywordlist} = ("1") x @keywordlist ; foreach $key (@searchlist) { if($dictionary{$key} eq "1") { # print "Match = True! \n

\n" ; return 1 ; } } # print "Match = False :( \n

\n" ; return 0 ; } ################################### # The Main program starts here. ################################### MAIN: { my (%input, # The CGI data $text, # Munged version of the text field entered by the user $field); # Each of the fields (used for testing) # # Read in all the variables set by the form # &ReadParse(\%input); my $trans = $input{"transcript"} ; my $duration = $input{"duration"} ; my $initial = $input{"starttime"} ; # need to search the clip files to find matching clips. my $search = $input{"search"} ; # Search through the clip files. # Initially let's just use one, but eventually we will # search a number of different clip files. # open the clip file my $clipFile = $xmlrootpath . "mark.xml" ; # build a parser for the clip xml file. my $parser = new XML::DOM::Parser; my $clipdoc = $parser->parsefile($clipFile) ; # Get the first clip. my $clips = $clipdoc->getElementsByTagName("clips"); if($clips->getLength() == 0) { print "No clips available in file " ; } my $clip = $clips->item(0) ; # Get the info out of the clip file. my $info = $clip->getElementsByTagName("info"); if($info->getLength() == 0) { print "No info available in transcript " ; } my $filename = $info->item(0)->getElementsByTagName("filename")->item(0); my $author = $info->item(0)->getElementsByTagName("author"); ################################### # create a smil file. ################################### my $smilID = time() ; until(!open(MAINSMILFILE, $filerootpath . $smilID . ".smil" )) { $smilID = time(); } my $MAINSMILFILENAME = $filerootpath . $smilID . ".smil" ; open (MAINSMILFILE,">$MAINSMILFILENAME") || die "can't write to user file"; ##################################################### # Write the initial html out for the results file. ##################################################### print &PrintHeader; print "\n\n\nTEST\n\n\n\n" ; print "

Results Returned

\n" ; print "

" ; ##################################################### # Write the initial Smil stuff to set up the results. ##################################################### print MAINSMILFILE "\n\n" ; print MAINSMILFILE "\n" ; print MAINSMILFILE "" ; print MAINSMILFILE "\n" ; print MAINSMILFILE "\n" ; print MAINSMILFILE "\n" ; print MAINSMILFILE "\n\n\n" ; ####################################################### # Search the clips in the clip xml file. ####################################################### # Extract the speaker information my $segments = $clip->getElementsByTagName("segments")->item(0); my $segmentlist = $segments->getElementsByTagName("segment") ; my $numSegments = $segmentlist->getLength() ; my $matches = 0 ; my $totalduration = 0; # print "Checking " . $numSegments . " segments \n

\n" ; for (my $seg = 0; $seg < $numSegments; $seg++) { # Extract the data from the XML. my $keywords = $segmentlist->item($seg)->getElementsByTagName("keywords")->item(0); my $startspeech = $segmentlist->item($seg)->getElementsByTagName("start-speech")->item(0); my $endspeech = $segmentlist->item($seg)->getElementsByTagName("end-speech")->item(0); my $startpos = $startspeech->getFirstChild()->getData() ; my $endpos = $endspeech->getFirstChild()->getData() ; my $keywordtext = $keywords->getFirstChild()->getData() ; # See if the keywords match the search. # print "Segment =" . $seg . " startspeech = " . $startpos . " endspeech = " . $endpos . " \n
\n" ; # print "search =" . $search . " \n
\n" ; # print "keywords =" . $keywordtext . " \n

\n" ; if(matchup($search,$keywordtext) == 1) { ################################### # create a clip smil file. ################################### my $clipSmilID = time() ; until(!open(CLIPSMILFILE, $filerootpath . $clipSmilID . ".smil" )) { $clipSmilID = time(); } my $CLIPSMILFILENAME = $filerootpath . $clipSmilID . ".smil" ; open (CLIPSMILFILE,">$CLIPSMILFILENAME") || die "can't write to user file"; ##################################################### # Write the initial Smil stuff to set up the results. ##################################################### print CLIPSMILFILE "\n\n" ; print CLIPSMILFILE "\n" ; print CLIPSMILFILE "" ; print CLIPSMILFILE "\n" ; print CLIPSMILFILE "\n" ; print CLIPSMILFILE "\n" ; print CLIPSMILFILE "\n\n\n" ; print CLIPSMILFILE "\n" ; # let's build the rt file now. $matches = $matches + 1 ; print MAINSMILFILE "\n" ; # If they do then make the rt file. my $rtID = $smilID ; until(!open(RTFILE, $filerootpath . $rtID . ".rt" )) { $rtID = time(); } my $rtFileName = $filerootpath . $rtID . ".rt" ; # Try our new subroutine. (my $clipbegin,my $clipend) = buildRtFile($trans,$startpos,$endpos,$rtFileName) ; my $duration = timesubtract($clipend,$clipbegin) ; # Write the entries into the clip smil file. print CLIPSMILFILE "\n" ; print CLIPSMILFILE "\n\n" ; close CLIPSMILFILE ; # close the smil file par entry. print MAINSMILFILE "\n" ; # write the clip entry into the returned HTML file. my $cliptext = getSegmentText($filename->getFirstChild()->getData(),$startpos) ; my $trunctext = substr($cliptext,0,200) ; $trunctext =~ s/ [^ ]+$// ; print "$trunctext" . "...\n
\n" ; print "" ; print "view clip.\n" ; print "

\n" ; } } ############################################## # Close off the main SMIL file. ############################################## print MAINSMILFILE "\n\n" ; close MAINSMILFILE ; ############################################## # Close off the main HTML file. ############################################## print "" ; print "The combined results\n" ; print "

\n" ; print "\n" ; }