Subversion Repositories SmartDukaan

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5827 amar.kumar 1
/*
2
 *	Copyright 2005 stat4j.org
3
 *
4
 *   Licensed under the Apache License, Version 2.0 (the "License");
5
 *   you may not use this file except in compliance with the License.
6
 *	You may obtain a copy of the License at
7
 *
8
 *       http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 *   Unless required by applicable law or agreed to in writing, software
11
 *   distributed under the License is distributed on an "AS IS" BASIS,
12
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 *   See the License for the specific language governing permissions and
14
 *   limitations under the License.
15
 */
16
 
17
 package net.sourceforge.stat4j.filter;
18
 
19
import java.text.DecimalFormat;
20
import java.util.HashMap;
21
import java.util.Map;
22
import java.util.regex.Matcher;
23
import java.util.regex.Pattern;
24
 
25
import net.sourceforge.stat4j.util.Util;
26
 
27
/**
28
 * Name:		RegExpScraper.java
29
 * Date:		Sep 4, 2004
30
 * Description:
31
 * 
32
 * Class that implements basic scraping logic given a scap pattern 
33
 * conforming to standard regular expressions.
34
 * 
35
 * For example: 	
36
 * 	
37
 * pattern:		
38
 * text:
39
 * value:
40
 * 
41
 * @author Lara D'Abreo
42
 */
43
 
44
public class RegExpScraper  {
45
 
46
	protected String decimalFormat = null;
47
	// A Cache of precompiled scrap patterns, this is
48
	// loaded on demand
49
	protected Map patterns;
50
	// As Formatters are not threadsafe we use ThreadLocal
51
	// to guaranttee one (and only on instance per Thread).
52
	protected ThreadLocal format = new ThreadLocal() {
53
		protected synchronized Object initialValue() {
54
			if (decimalFormat == null)
55
				return new DecimalFormat();
56
			else
57
				return new DecimalFormat(decimalFormat);
58
		}
59
	};
60
 
61
 
62
	public RegExpScraper() {
63
		patterns = new HashMap();
64
		decimalFormat = Util.getValue("decimalformat");
65
	}
66
 
67
 
68
	public Double scrapUserDefinedValue(String text, String pattern) {
69
			System.out.println(text);
70
			System.out.println(pattern);
71
		// if the text is empty or null just return no reading
72
		if ((text == null)
73
			|| (text.length() == 0)
74
			|| (pattern == null)
75
			|| (pattern.length() == 0)) {
76
			return null;
77
		}
78
		// get pre-compiled pattern, compile one if it isnt available
79
		Pattern p  = getPattern(pattern);
80
 
81
		// Create a matcher
82
		Matcher m = p.matcher(text);
83
 
84
		try {
85
			if (!m.matches())
86
				return null;
87
 
88
			// Get first match group
89
			String str = m.group(1);
90
			System.out.println(str);
91
			// Use the formater to parse out the value . The formatter will
92
			// take account of the country specific formatting of the number
93
			// whereas Double.parse will not.
94
			// Any extraneous text after the number (if it is present) will be ignored
95
			// by the formatter. This means we can be lazy with what the str ends with
96
			DecimalFormat df = (DecimalFormat) format.get();
97
			Number value = df.parse(str);
98
			return new Double(value.doubleValue());
99
		} catch (Exception ex) {
100
			return null;
101
		}
102
 
103
	}
104
 
105
	public final Pattern getPattern(String regexp) {
106
 
107
		Pattern pattern = (Pattern) patterns.get(regexp);
108
		if (pattern == null) {
109
			pattern = Pattern.compile(regexp);
110
			patterns.put(regexp, pattern);
111
		}
112
		return pattern;
113
 
114
	}
115
 
116
	public void dispose() {
117
		patterns.clear();
118
		format = null;
119
	}
120
 
121
 
122
	public static void main(String[] args) {
123
		try {
124
			RegExpScraper scraper = new RegExpScraper();
125
 
126
			String pattern = ".*post=([0-9].*)+";
127
			String log = "Time to process post=1.0";
128
 
129
			Double value = scraper.scrapUserDefinedValue(log,pattern);
130
 
131
			System.out.println("Scrape Value=" + value);
132
		}catch (Exception e) {
133
			e.printStackTrace();
134
		}
135
 
136
	}
137
 
138
 
139
 
140
}