//
//  AudioExtractor.m
//  AVFAudioExtractor
//
//  Created by hasloe on 14/01/2021.
//  Copyright © 2021 MPI. All rights reserved.
//

#import "AudioExtractor.h"
#import <Foundation/Foundation.h>
#import <CoreFoundation/CFDictionary.h>

static BOOL AVFDebug = FALSE;

/*
 * A class that extracts the (first) audio track from a video (or an audio) file
 * and returns decoded audio samples for a specified time interval to the caller.
 * Usually for the purpose of visualization of a waveform or similar. If this
 * works, extracting a wav file from a video is no longer necessary in order to
 * see the waveform.
 *
 */
@implementation AudioExtractor

@synthesize assetReader;
@synthesize mediaAsset;
@synthesize audioTrack;
@synthesize trackOutput;
@synthesize javaRef;
@synthesize lastError;
@synthesize hasAudio;
@synthesize sampleFreq;
@synthesize sampleDurationMs;
@synthesize sampleDurationSeconds;
@synthesize mediaDurationMs;
@synthesize mediaDurationSeconds;
@synthesize numberOfChannels;
@synthesize bitsPerSample;
@synthesize bytesPerSampleBuffer;
@synthesize durationSecPerSampleBuffer;

/*
 * Initializes a AVURLAsset and creates a AVAssetReader for it if there is
 * at least one audio track. An AVAssetReaderTrackOutput is added to the reader
 * which sets the requested range and reads sample buffers in a loop and copies
 * the bytes to a larger buffer provided by the caller.
 * The output format (in case of audio) has to be Linear PCM, the number of
 * channels and bits-per-sample properties can be specified. An AVAudioPlayer
 * is created for retrieving the number of channels in the audio track (this
 * propery doesn't seem to be available from the AVAssetTrack or from a
 * SampleBuffer (?).
 */
- (id) initWithURL: (NSURL *) url {
    self = [super init];
    // hardcode / initialize the requested bitsPerSample value
    bitsPerSample = 16;
    
    mediaAsset = [AVURLAsset URLAssetWithURL:url options:@{ AVURLAssetPreferPreciseDurationAndTimingKey : @YES }];
    
    NSError *initError = nil;
    assetReader = [AVAssetReader assetReaderWithAsset:mediaAsset error:&initError];
    
    if (initError != nil) {
        NSLog(@"AudioExtractor_initWithURL: AssetReader is NULL. %@", [initError localizedDescription]);
        
        [self setLastError: initError];
        assetReader = nil;
    } else {
        // use AVAudioPlayer to detect the number of channels, does not work for mpg files
        NSError *auPlayerError;
        AVAudioPlayer *auPlayer = [[AVAudioPlayer alloc] initWithContentsOfURL:url error:&auPlayerError];
        if (auPlayerError == nil) {
            numberOfChannels = (int) [auPlayer numberOfChannels];
            if (AVFDebug) {
                NSLog(@"AudioExtractor_initWithURL: created an AVAudioPlayer, #channels: %d", numberOfChannels);
            }
            // release player
            auPlayer = nil;
        } else {
            if (AVFDebug) {
                NSLog(@"AudioExtractor_initWithURL: unable to create an AVAudioPlayer");
            }
            // default
            numberOfChannels = 2;
        }
        
        NSArray<AVAssetTrack *> *audioTracks = [mediaAsset tracksWithMediaType: AVMediaTypeAudio];
        if ([audioTracks count] > 0) {
            if (AVFDebug) {
                NSLog(@"AudioExtractor_initWithURL: the file has %lu audio track(s)", [audioTracks count]);
            }
            CMTime cmDuration = [mediaAsset duration];
            mediaDurationSeconds = CMTimeGetSeconds(cmDuration);
            mediaDurationMs = 1000 * mediaDurationSeconds;
            NSLog(@"AudioExtractor_initWithURL: media duration seconds %f", mediaDurationSeconds);
            
            hasAudio = YES;
            audioTrack = [audioTracks firstObject];
            [self detectTrackProperties:audioTrack];
            // decoder configuration settings; LPCM is required, the number of channels
            // may have been detected by the AudioPlayer (a property that doesn't seem
            // to be detectable from a sample buffer), bit depth just has to be set
            NSDictionary<NSString *, id> *outSettings = @{AVFormatIDKey : @(kAudioFormatLinearPCM), AVNumberOfChannelsKey : @(numberOfChannels), AVLinearPCMBitDepthKey: @(bitsPerSample), AVLinearPCMIsFloatKey : @(NO)};
        
            trackOutput = [AVAssetReaderTrackOutput assetReaderTrackOutputWithTrack:audioTrack outputSettings:outSettings];
            [assetReader addOutput:trackOutput];
            [trackOutput setSupportsRandomAccess:YES];
            if (AVFDebug) {
                NSLog(@"AudioExtractor_initWithURL: track output supports random access: %d", [trackOutput supportsRandomAccess]);
            }
        
            [self detectSampleProperties];
        } else {
            if (AVFDebug) {
                NSLog(@"AudioExtractor_initWithURL: the file has no audio tracks");
            }
        }
        
    }
    
    return self;
}

/*
 * To be called only once for a track, tries to detect a few properties, some are
 * essential for handling and interpreting the retrieved bytes, e.g. the sample
 * frequency.
 */
- (void) detectTrackProperties: (AVAssetTrack *) track {
    if (track != nil) {
        NSLog(@"AudioExtractor_detectTrackProperties:");
        NSLog(@"\tmedia type: %@", [track mediaType]);
        NSLog(@"\ttotal sample data length: %lld", [track totalSampleDataLength]);// sometimes 0
        CMTimeScale timeScale = [track naturalTimeScale];
        NSLog(@"\tnatural time scale: %d", timeScale);// the sample frequency!
        sampleFreq = timeScale;
        
        if (AVFDebug) {
            NSLog(@"\tcan provide sample cursors: %d", [track canProvideSampleCursors]);//yes
            NSLog(@"\tis self contained: %d", [track isSelfContained]);// yes
            NSLog(@"\tnominal frame rate: %f", [track nominalFrameRate]);
            NSLog(@"\testimated data rate: %f", [track estimatedDataRate]);
            NSLog(@"\thas audio samples dependencies: %d", [track hasAudioSampleDependencies]);// no
            NSArray *forDescs = [track formatDescriptions];// not there
            for (id cmd in forDescs) {
                CMFormatDescriptionRef cmdRef = (__bridge CMFormatDescriptionRef) cmd;
                NSLog(@"\tformat description: %c", CMFormatDescriptionGetMediaSubType(cmdRef));
            }
            CMTime minFrDur = [track minFrameDuration];// 0 / 0
            NSLog(@"\tminimal frame duration: %f (%lld / %d)", (minFrDur.value / (double)minFrDur.timescale), minFrDur.value, minFrDur.timescale);
        }
    }
}

/*
 * To be called only once, tries to detect some properties of the sample buffers.
 * It reads a small fragment of the media file and initializes a few sample and
 * sample buffer related members of this class (the usual size of a buffer, the
 * duration of single sample, the number of samples in the buffer etc.).
 * The reader doesn't seem to read beyond the specified end time but rather
 * produces a smaller sample buffer at the end. In between buffers can be of different
 * sizes too, probably depending on the (compressed) chunks of the source.
 */
- (void) detectSampleProperties {
    if (assetReader != nil && trackOutput != nil) {
        double sDur = mediaDurationSeconds < 0.5 ? mediaDurationSeconds : 0.5;
        [assetReader setTimeRange:CMTimeRangeFromTimeToTime(CMTimeMakeWithSeconds(0, 1000), CMTimeMakeWithSeconds(sDur, 1000))];
        BOOL readyForRead = [assetReader startReading];
        
        if (!readyForRead) {
            NSLog(@"AudioExtractor_detectSampleProperties: the reader is not ready for reading, cannot detect properties");
        }
        
        int count = 0;
        // test the reader status in a loop
        while (assetReader.status != AVAssetReaderStatusCompleted) {//assetReader.status == AVAssetReaderStatusReading
            
            CMSampleBufferRef sampleRef = [trackOutput copyNextSampleBuffer];
            
            if (sampleRef != nil) {
                CMSampleTimingInfo timingInfoOut;
                CMSampleBufferGetSampleTimingInfo(sampleRef, 0, &timingInfoOut);
                double presTime = CMTimeGetSeconds(timingInfoOut.presentationTimeStamp);

                CMTime bufferDuration = CMSampleBufferGetDuration(sampleRef);
                CMItemCount numSamples = CMSampleBufferGetNumSamples(sampleRef);
                size_t totalSampleSize = CMSampleBufferGetTotalSampleSize(sampleRef);
                size_t sampleSize = CMSampleBufferGetSampleSize(sampleRef, 0);
                
                CFTypeID bufferType = CMSampleBufferGetTypeID();
                
                // initialize some members once
                if (count == 0) {
                    sampleFreq = bufferDuration.timescale;
                    sampleDurationSeconds = CMTimeGetSeconds(timingInfoOut.duration);
                    sampleDurationMs = 1000 * sampleDurationSeconds;
                    durationSecPerSampleBuffer = (bufferDuration.value / (double) bufferDuration.timescale);
                    bytesPerSampleBuffer = (int) totalSampleSize;
                
                    // output
                    NSLog(@"AudioExtractor_SampleProperties: buffer type: %ld", bufferType);
                    NSLog(@"\tsample duration %f: ", sampleDurationSeconds);// 1 sec / sample frequency
                    if (AVFDebug) {
                        NSLog(@"\tsample presentation time %f: ", presTime);// time of the first sample
                    }
                    NSLog(@"\tsample buffer duration %f (%lld / %d)", (bufferDuration.value / (double) bufferDuration.timescale), bufferDuration.value, bufferDuration.timescale); // == num samples * sample duration, the buffer duration CMTime has the number of samples in the buffer as the value and the sample frequency as the timescale (e.g. 44100 or 48000)
                    NSLog(@"\tnumber of samples %ld", numSamples);// number of samples in the buffer, can be different per iteration
                    NSLog(@"\ttotal sample size %ld", totalSampleSize);// sample size * num samples, in bytes?
                    NSLog(@"\tbuffer sample size %ld", sampleSize);// the size of a single (complete?) sample, in bytes?
                    
                    /* the following doesn't provide much information
                    CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleRef);
                    if (blockBuffer) {
                        size_t bbLength = CMBlockBufferGetDataLength(blockBuffer);
                        NSLog(@"\tblock buffer length: %zu", bbLength);// same as total sample size
                    }
                    
                    CMFormatDescriptionRef formatRef = CMSampleBufferGetFormatDescription(sampleRef);
                    if (formatRef) {
                        CMMediaType mediaMainType = CMFormatDescriptionGetMediaType(formatRef);
                        NSString * mediaMainString;// -> 'soun'
                        if (mediaMainType == kCMMediaType_Audio) {
                            mediaMainString = @"soun";
                        } else {
                            mediaMainString = @"unknown";
                        }
                        
                        CMMediaType mediaSubType = CMFormatDescriptionGetMediaSubType(formatRef);
                        unichar c[4];
                        c[0] = (mediaSubType >> 24) & 0xFF;
                        c[1] = (mediaSubType >> 16) & 0xFF;
                        c[2] = (mediaSubType >> 8) & 0xFF;
                        c[3] = (mediaSubType >> 0) & 0xFF;
                        NSString *subString = [NSString stringWithCharacters:c length:4];
                        // subString -> 'lpcm'
                        NSLog(@"AudioExtractor_detectSampleProperties: CMFormatDescription %@, %@", mediaMainString, subString);
                        CFDictionaryRef formatDict = CMFormatDescriptionGetExtensions(formatRef);
                        
                        if (formatDict) {
                            CFIndex keyCount = CFDictionaryGetCount(formatDict);
                            CFStringRef * keys = new CFStringRef[keyCount];
                            CFDictionaryGetKeysAndValues(formatDict, (const void **)&keys, (const void **)NULL);
                            for (int i = 0; i < keyCount; i++) {
                                NSLog(@"Format Ext %d key: %@, value %@", i, keys[i], CFDictionaryGetValue(formatDict, keys[i]));
                            }
                        } else {
                           NSLog(@"AudioExtractor_detectSampleProperties: no CMFormatDescription Extensions");
                        }
                    } else {
                        NSLog(@"AudioExtractor_detectSampleProperties: no CMFormatDescription");
                    }
                    
                    CMItemCount sizeArrEntries = 0;
                    CMItemCount sizeNeededOut;// -> 1
                    CMSampleBufferGetSampleSizeArray(sampleRef, sizeArrEntries, NULL, &sizeNeededOut);
                    NSLog(@"AudioExtractor_detectSampleProperties: get sample size array: %ld", sizeNeededOut);

                    CMItemCount timingArrayIn = 0;
                    CMItemCount timingArrayNeededOut;// -> 1
                    CMSampleBufferGetOutputSampleTimingInfoArray(sampleRef, timingArrayIn, NULL, &timingArrayNeededOut);
                    NSLog(@"AudioExtractor_detectSampleProperties: output sample timing array: %ld", timingArrayNeededOut);
                    // still no way to get the number of audio channels and bits per sample?
                     */
                }
            } else {
                break;
            }
            CMSampleBufferInvalidate(sampleRef);
            CFRelease(sampleRef);
            count++;
        }
        if (AVFDebug) {
            NSLog(@"AudioExtractor_detectSampleProperties: number of iterations %d", count);
        }
        //[assetReader cancelReading];// a cancelled reader can not be restarted
    } else {
        if (AVFDebug) {
            NSLog(@"AudioExtractor_detectSampleProperties: no reader or track output");
        }
    }
}

/*
 * Sets the read position of the reader. Empty implementation.
 * This is not very useful for this framework since a range has to be
 * provided to the reader and/or trackoutput, not just a starting point.
 */
- (BOOL) setPositionSec : (double) seekTime {
    if (assetReader != nil) {
//        [assetReader setTimeRange:CMTimeRangeMake(CMTimeMakeWithSeconds(seekTime, 1000), CMTimeMakeWithSeconds(seekTime + 1.0, 1000))];
        return TRUE;
    }
    
    return FALSE;
}

/*
 * Note:
 * [AVAssetReader startReading] cannot be called again after reading has already started.
 * Repeatedly seeking a random position seems possible through the AVAssetReaderTrackOutput.
 *
 * The start time of the interval and the duration have to be provided, as well as a
 * destination buffer (e.g. a Java DirectBuffer of sufficient size). It seems the
 * presentation time of the first buffer (almost) exactly matches the requested time,
 * so no need to skip bytes at the start of the buffer in the first iteration.
 */
- (int) getSamplesFromTime: (double) fromTime duration: (double) rangeDuration bufferAddress: (char *) destBuffer bufferLength: (size_t) destBufferSize {
    size_t numCopied = 0;
    
    if (assetReader != nil && trackOutput != nil) {
        CMTime cmFromTime = CMTimeMakeWithSeconds(fromTime, 1000);
        CMTime cmDuration = CMTimeMakeWithSeconds(rangeDuration, 1000);
        CMTimeRange timeRange = CMTimeRangeMake(cmFromTime, cmDuration);
        NSValue *rangeVal = [NSValue valueWithCMTimeRange:timeRange];
        NSArray<NSValue *> *ranges = [NSArray arrayWithObject:rangeVal];
        @try {
            [trackOutput resetForReadingTimeRanges:ranges];
        } @catch (NSException *nse) {
            NSLog(@"AudioExtractor_getSamplesFromTimes: error while setting the range %@", [nse reason]);
            return 0;
        }
        
        // reading is not cancelled, so start does not need to be called
        //[assetReader setTimeRange:CMTimeRangeMake(cmFromTime, cmDuration)];
        //BOOL readyForRead = [assetReader startReading];
        /*
        if (!readyForRead) {
            NSLog(@"AudioExtractor_SamplesFromTime: reader not ready to read");
            return 0;
        }
        */
        // test the reader status in a loop
        while (assetReader.status != AVAssetReaderStatusCompleted) {
            CMSampleBufferRef sampleRef = [trackOutput copyNextSampleBuffer];
            
            if (sampleRef != nil) {
                CMSampleTimingInfo timingInfoOut;
                CMSampleBufferGetSampleTimingInfo(sampleRef, 0, &timingInfoOut);
                // check the presentation time on the first read
                //double presTime = CMTimeGetSeconds(timingInfoOut.presentationTimeStamp);
                size_t totalSampleSize = CMSampleBufferGetTotalSampleSize(sampleRef);
                //...
                CMBlockBufferRef blockBuffer = CMSampleBufferGetDataBuffer(sampleRef);
                if (blockBuffer) {
                    // maybe check if it is contiguous
                    //Boolean blockCont = CMBlockBufferIsRangeContiguous(blockBuffer, 0, totalSampleSize);
                    //size_t bbLength = CMBlockBufferGetDataLength(blockBuffer);
                    
                    // variables for copying
                    size_t lengthAtOffset;
                    char * outAddress;
                    OSStatus pointerStatus = CMBlockBufferGetDataPointer(blockBuffer , 0, &lengthAtOffset, NULL, &outAddress);
                    
                    if (pointerStatus == kCMBlockBufferNoErr) {
                        //if lengthAtOffset != totalSampleSize only part of the buffer can be copied, an error situation
                        if (lengthAtOffset != totalSampleSize) {
                            //log and return?
                        }
                        
                        // the number of bytes that can be copied depends on remaining space in the destination buffer and the available bytes in the source
                        size_t remainingSize = destBufferSize - numCopied;
                        size_t copySize = lengthAtOffset < remainingSize ? lengthAtOffset : remainingSize;
                        
                        memcpy(destBuffer + numCopied, outAddress, copySize);
                        numCopied += copySize;
                    } else {
                        // log the error
                    }
                }
            } else {
                break;
            }
            if (numCopied == destBufferSize) {
                break;
            }
            // clean up, release?
            CMSampleBufferInvalidate(sampleRef);
            CFRelease(sampleRef);
        }
    }
    
    return (int) numCopied;
}

/*
 * Cancels reading from the reader and removes some references.
 */
- (void) releaseReader {
    if (assetReader != nil) {
        [assetReader cancelReading];
    }
    assetReader = nil;
    trackOutput = nil;
    mediaAsset  = nil;
    audioTrack  = nil;
}

/*
 * Enables or disables debug mode.
 */
+ (void) setDebugMode: (BOOL) mode {
    AVFDebug = mode;
}

/*
 * Returns the current debug state.
 */
+ (BOOL) isDebugMode {
    return AVFDebug;
}


@end
