//
//  CharacterFrequency.m
//  FCIMDatabaseBuilder
//
//  Created by Andrew Choi on 19/08/08.
//  Copyright 2008 Andrew Choi. All rights reserved.
//

/*
 
 Permission for the use of this code is granted only for research, educational, and non-commercial purposes.
 
 Redistribution of this code or its parts in source, binary, and any other form without permission, with or without modification, is prohibited.  Modifications include, but are not limited to, translation to other programming languages and reuse of tables, constant definitions, and API's defined in it.
 
 Andrew Choi is not liable for any losses or damages caused by the use of this software.
 
 */

#import "CharacterFrequency.h"

@implementation CharacterFrequency

- (CharacterFrequency *)initWithFile:(NSString *)filename
{
    self = [super init];
    
	if (self)
	{
		dictionary = [[NSMutableDictionary alloc] initWithCapacity:0];
		
		NSString *frequencyDatabase = [NSString stringWithContentsOfFile:filename encoding:CFStringConvertEncodingToNSStringEncoding(kCFStringEncodingMacChineseTrad) error:NULL];
		
		NSScanner *scanner = [NSScanner scannerWithString:frequencyDatabase];
		
		[scanner scanUpToString:@"-------------------------------------------" intoString:NULL];
		[scanner scanString:@"-------------------------------------------" intoString:NULL];
		[scanner scanUpToString:@"-------------------------------------------" intoString:NULL];
		[scanner scanString:@"-------------------------------------------" intoString:NULL];
		[scanner scanUpToString:@"-------------------------------------------" intoString:NULL];
		[scanner scanString:@"-------------------------------------------" intoString:NULL];
		
		while (![scanner isAtEnd])
		{
			NSString *unicode;
			int frequency;
			
			[scanner scanInt:NULL];
			[scanner scanCharactersFromSet:[[NSCharacterSet whitespaceCharacterSet] invertedSet] intoString:&unicode];
			[scanner scanInt:&frequency];
			[scanner scanUpToString:@"\n" intoString:NULL];
			[scanner scanString:@"\n" intoString:NULL];
			
			//NSLog(@"%@ - %d", unicode, frequency);
			[dictionary setObject:[NSNumber numberWithInt:frequency] forKey:unicode];
		}		
    }
    
	return self;
}

- (void)dealloc
{
	[dictionary release];
	
	[super dealloc];
}

+ (CharacterFrequency *)characterFrequencyWithFile:(NSString *)filename
{
	return [[[CharacterFrequency alloc] initWithFile:filename] autorelease];
}

- (NSDictionary *)dictionary
{
	return dictionary;
}

- (NSComparisonResult)compareFrequencyOfChar:(NSString *)c1 withChar:(NSString *)c2
{
	int freq1 = [[dictionary objectForKey:c1] intValue];
	int freq2 = [[dictionary objectForKey:c2] intValue];
	
	// Make more frequent characters appear earlier.
	if (freq1 > freq2)
        return NSOrderedAscending;
    else if (freq1 < freq2)
        return NSOrderedDescending;
    else
        return NSOrderedSame;	
}

// Frequency of a phrase is *approximated* by the product of the frequencies of all its characters.
- (NSComparisonResult)compareFrequencyOfPhrase:(NSString *)p1 withPhrase:(NSString *)p2
{
	float freq1 = 1.0;
	int len1 = [p1 length];
	for (int i = 0; i < len1; i++)
		freq1 *= (float) [[dictionary objectForKey:[p1 substringWithRange:NSMakeRange(i, 1)]] intValue];
	
	float freq2 = 1.0;
	int len2 = [p2 length];
	for (int i = 0; i < len2; i++)
		freq2 *= (float) [[dictionary objectForKey:[p2 substringWithRange:NSMakeRange(i, 1)]] intValue];
	
	// Make more frequent characters appear earlier.
	if (freq1 > freq2)
        return NSOrderedAscending;
    else if (freq1 < freq2)
        return NSOrderedDescending;
    else
        return NSOrderedSame;	
}

@end
