[Clam-devel] adding frames to a segment

Giacomo Sommavilla azeem at libero.it
Wed Jun 20 08:11:18 PDT 2007


hi greg!

Here is a minimal program we use to analyze short speech audio chunks (we are developing a CLAM-based module for a TTS system) and store each one of them into a sdif file.
I also attach a standard xml analysis config file..

..hope they are useful :-)

giacomo.




> Hi all,
> 
> I've been trying to get CLAM to load an audio file, analyze it, and write
> the results to an SDIF file.  So far I have CLAM reading audio into a
> segment, but when I give this segment to the class SpectralAnalysis
> everything goes down with a null pointer exception because the segment
> doesn't yet have any frames.  I'm not sure what I need to do here.  I could
> create a new Frame object and add it to the Segment's frame list, but there
> must be a class that is responsible for chopping the audio data into
> frames.  So, I really shouldn't need to do this.  The question is what class
> does this?  After one loads audio data into a Segment what does one have to
> do to create the list of frames before passing the Segment on to the
> analysis routine?
> 
> I've included an example file below that illustrates what I'm talking
> about.  The null pointer exception comes at the line
>          while (aSpectralAnalysis.Do( aSegment )) {}
> And it's thrown inside the SpectralAnalysis object when that object tries to
> access a Frame object that doesn't exist.
> 
> Best,
> Greg
> 
> 
> #include "SimpleOscillator.hxx"
> #include "SMSAnalysisCore.hxx"
> #include "SMSAnalysisConfig.hxx"
> #include "SMSSynthesis.hxx"
> #include "Audio.hxx"
> #include "Frame.hxx"
> #include "Segment.hxx"
> #include "AudioOutPort.hxx"
> #include "MonoAudioFileReader.hxx"
> #include "AudioDatabaseReader.hxx"
> #include "Network.hxx"
> #include "SpectralAnalysis.hxx"
> #include "SpectralPeakDetect.hxx"
> #include "Spectrum.hxx"
> #include "SpectralPeakArray.hxx"
> #include "SDIFOut.hxx"
> #include "SpectralAnalysis.hxx"
> #include "SpectralAnalysisConfig.hxx"
> #include "AudioFileIn.hxx"
> #include "AudioFileConfig.hxx"
> 
> #define AUDIO_IN_FILE
> "/Users/greg/Music/samples/MyPatches/EBowGuitarWav/EBow_Guitar_A4_RS.wav"
> #define SDIF_OUT_FILE
> "/Users/greg/Music/samples/MyPatches/EBowGuitarSDIF/EBow_Guitar_A4_RS_cmd.sdif"
> 
> class SDIFWriterApp;
> 
> class SDIFWriterApp
> {
> public:
>     void setUp()
>     {
>     }
> 
>     CLAM::SMSSynthesis mSynthesis;
> 
>     const int helperResAnalWindowSize() { return 1025; }
> //    const int helperAnalWindowSize() { return 2049; }
>     const int helperAnalWindowSize() { return 1025; }
>     const int helperAnalHopSize() {return 512;}
> 
> 
>     //TODO: fix. it runs but produces a broken sinusoidal
>     void testAnalysisSynthesis() //no segment, no frame just streaming inner
> data
>     {
> //        CLAM::ErrAssertionFailed::breakpointInCLAMAssertEnabled = true;
> 
>            unsigned int buffersize = 1024;
>         int samplerate = 44100;
>         int frameSize = 2048;
> 
>         CLAM::Segment aSegment = CLAM::Segment();
>         std::string filename = std::string(AUDIO_IN_FILE);
>         LoadSound(filename, aSegment);
> 
>         aSegment.SetFramesArray(CLAM::List<CLAM::Frame>());
>         aSegment.SetChildren(CLAM::List<CLAM::Segment>());
>         aSegment.mCurrentFrameIndex=0;
> 
>         CLAM::SpectralAnalysisConfig aSpectralAnalysisConfig;
>         aSpectralAnalysisConfig.SetprHopSize( 256 );
>         aSpectralAnalysisConfig.SetprZeroPadding( 2 );
>         aSpectralAnalysisConfig.SetprFFTSize( 2048 );
> 
>         std::cout << "About to request audio from the segment." <<
> std::endl;
>         CLAM::Audio theAudio = aSegment.GetAudio();
>         std::cout << "Retrieved " << theAudio.GetDuration() << "
> milliseconds of audio from the segment." << std::endl;
> 
>         CLAM::SpectralAnalysis aSpectralAnalysis( aSpectralAnalysisConfig );
>         aSpectralAnalysis.Start();
>         while (aSpectralAnalysis.Do( aSegment )) {}
>         aSpectralAnalysis.Stop();
> 
>         CLAM::SDIFOutConfig theSDIFOutConfig;
>         theSDIFOutConfig.SetEnableFundFreq(false);
>         theSDIFOutConfig.SetEnablePeakArray(true);
>         theSDIFOutConfig.SetEnableResidual(true);
>         theSDIFOutConfig.SetFileName( SDIF_OUT_FILE );
>         theSDIFOutConfig.SetFrameSize(frameSize);
>         theSDIFOutConfig.SetSamplingRate(samplerate);
>         theSDIFOutConfig.SetSpectrumSize(buffersize);
>         CLAM::SDIFOut theSDIFOut(theSDIFOutConfig);
> 
>         theSDIFOut.Start();
>         int nFrames = aSegment.GetnFrames();
>         for( int i=0; i<nFrames; i++ )
>         {
>             theSDIFOut.Do( aSegment.GetFrame( i ) );
>         }
>         theSDIFOut.Stop(  );
>         //myMonoAudioFileReader.Stop();
>     }
> 
>     // helper methods for the network tests
>     const CLAM::SMSAnalysisConfig& helperAnalysisConfigInstance()
>     {
> 
>         int analHopSize = 512;
> //        analHopSize= (resAnalWindowSize-1)/2 ;
> 
> //        int synthFrameSize = analHopSize;
>         int analZeroPaddingFactor= 2;
> 
>         // SMS Analysis configuration
>         static CLAM::SMSAnalysisConfig analConfig;
> 
>         analConfig.SetSinWindowSize(helperAnalWindowSize() );
>         analConfig.SetHopSize(analHopSize);
> //        analConfig.SetSinWindowType(mGlobalConfig.GetAnalysisWindowType
> ());
>         analConfig.SetSinZeroPadding(analZeroPaddingFactor);
>         analConfig.SetResWindowSize( helperResAnalWindowSize() );
> //        analConfig.SetResWindowType(mGlobalConfig.GetResAnalysisWindowType
> ());
> 
> //        analConfig.GetPeakDetect().SetMagThreshold(
> mGlobalConfig.GetAnalysisPeakDetectMagThreshold());
> //        analConfig.GetPeakDetect().SetMaxFreq(
> mGlobalConfig.GetAnalysisPeakDetectMaxFreq());
> //        analConfig.GetSinTracking().SetIsHarmonic(
> mGlobalConfig.GetAnalysisHarmonic());
> //        analConfig.GetFundFreqDetect().SetReferenceFundFreq(
> mGlobalConfig.GetAnalysisReferenceFundFreq());
> //        analConfig.GetFundFreqDetect().SetLowestFundFreq(
> mGlobalConfig.GetAnalysisLowestFundFreq());
> //        analConfig.GetFundFreqDetect().SetHighestFundFreq(
> mGlobalConfig.GetAnalysisHighestFundFreq());
> 
>         return analConfig;
>     }
> 
>     const CLAM::SMSSynthesisConfig & helperSynthesisConfigInstance()
>     {
>         static CLAM::SMSSynthesisConfig synthConfig;
>         int synthFrameSize = helperAnalHopSize();
>         synthConfig.SetAnalWindowSize( helperResAnalWindowSize() );
>         synthConfig.SetFrameSize(synthFrameSize);
>         synthConfig.SetHopSize(synthFrameSize);
>         return synthConfig;
>     }
> 
>     bool LoadSound(const std::string& filename, CLAM::Segment& segment)
>     {
>         CLAM::AudioFileIn myAudioFileIn;
>         CLAM::AudioFileConfig infilecfg;
>         infilecfg.SetFilename(filename);
>         infilecfg.SetFiletype(CLAM::EAudioFileType::eWave);
>         if(!myAudioFileIn.Configure(infilecfg))
>         {
>             std::cout << "Configuration of audio file failed. Does file
> exist?" << std::endl;
>             return false;
>         }
> 
> 
> /////////////////////////////////////////////////////////////////////////////
>         // Initialization of the processing data objects :
>         CLAM::TSize fileSize=myAudioFileIn.Size();
> 
>         int mSamplingRate = int(myAudioFileIn.SampleRate());
> 
>         // Spectral Segment that will actually hold data
>         float duration=fileSize/mSamplingRate;
>         segment.SetEndTime(duration);
>         segment.SetSamplingRate(mSamplingRate);
>         segment.mCurrentFrameIndex=0;
>         segment.GetAudio().SetSize(fileSize);
>         segment.GetAudio().SetSampleRate(mSamplingRate);
> 
> 
>         //Read Audio File
>         myAudioFileIn.Start();
>         bool response = myAudioFileIn.Do(segment.GetAudio());
>         myAudioFileIn.Stop();
>         return true;
>     }
> 
> };
> 
> int main(int argc,char** argv)
> {
> //    try
> //    {
> 
>         SDIFWriterApp app;
>         app.testAnalysisSynthesis();
>         std::cout << "all done" << std::endl;
> /*
>     }
>     catch(Err error)
>     {
>         error.Print();
>         std::cerr << "Abnormal Program Termination" << std::endl;
>         return -1;
>     }
>     catch (std::exception e)
>     {
>         std::cout << e.what() << std::endl;
>         return -1;
>     }
> */
>     return 0;
> }
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: analysis.xml
Type: text/xml
Size: 2399 bytes
Desc: not available
URL: <http://lists.clam-project.org/pipermail/clam-devel-clam-project.org/attachments/20070620/5d78ec98/attachment.xml>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: SMSAnalyze.cxx
Type: text/x-c++src
Size: 2467 bytes
Desc: not available
URL: <http://lists.clam-project.org/pipermail/clam-devel-clam-project.org/attachments/20070620/5d78ec98/attachment-0008.cxx>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: SMSIo.cxx
Type: text/x-c++src
Size: 2319 bytes
Desc: not available
URL: <http://lists.clam-project.org/pipermail/clam-devel-clam-project.org/attachments/20070620/5d78ec98/attachment-0009.cxx>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: SMSIo.hxx
Type: text/x-c++hdr
Size: 452 bytes
Desc: not available
URL: <http://lists.clam-project.org/pipermail/clam-devel-clam-project.org/attachments/20070620/5d78ec98/attachment-0004.hxx>


More information about the clam-devel mailing list