Subversion Repositories SmartDukaan

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
13566 amit.gupta 1
'''
2
Created on Jan 15, 2015
3
 
4
@author: amit
5
'''
6
from pymongo.mongo_client import MongoClient
7
import importlib
8
import mechanize
9
sourceMap = {1:"amazon", 2:"flipkart", 3:"snapdeal"}
10
 
11
def getStore(source_id):
12
    #module = sourceMap[source_id]
13
    store = Store(source_id)
14
    module = importlib.import_module("dtr.sources." + sourceMap[source_id])
15
    store = getattr(module, "Store")(source_id)
16
    return store
17
 
18
class ScrapeException(Exception):
19
    """Exception raised for errors in the input.
20
 
21
    Attributes:
22
        expr -- input expression in which the error occurred
23
        msg  -- explanation of the error
24
    """
25
 
26
    def __init__(self, expr, msg):
27
        self.expr = expr
28
        self.msg = msg
29
 
30
class ParseException(Exception):
31
    """Exception raised for errors in the input.
32
 
33
    Attributes:
34
        expr -- input expression in which the error occurred
35
        msg  -- explanation of the error
36
    """
37
 
38
    def __init__(self, expr, msg):
39
        self.expr = expr
40
        self.msg = msg
41
 
42
class Store(object):
43
 
44
    ORDER_PLACED = 'Order Placed'
45
    ORDER_DELIVERED = 'Delivered'
46
    ORDER_SHIPPED = 'Shipped' #Lets see if we can make use of it
47
    ORDER_CANCELLED = 'Cancelled'
48
 
49
 
50
    def __init__(self, store_id):
51
        self.store_id = store_id
52
 
53
 
54
    def getName(self):
55
        raise NotImplementedError
56
 
57
    def scrapeAffiliate(self, startDate=None, endDate=None):
58
        raise NotImplementedError
59
 
60
    def parseOrderPage(self, htmlString=None): 
61
        raise NotImplementedError
62
 
63
    def saveToAffiliate(self, offers):
64
        raise NotImplementedError
65
 
66
    def scrapeStoreOrders(self,):
67
        raise NotImplementedError
68
    '''
69
    Parses the order for specific store
70
 
71
    order id, total amount, created on(now() if could not parse
72
    suborder id, title, quantity, unit price, expected delivery date,
73
    status (default would be Order placed)
74
 
75
    once products are identified, each suborder can then be updated
76
    with respective cashback.
77
 
78
    Possible fields to display for Not yet delivered orders are 
79
    Product/Quantity/Amount/Store/CashbackAmount/OrderDate/ExpectedDelivery/OrderStaus/DetailedStatus/CashbackStatus
80
    No need to show cancelled orders.
81
    CashbackStatus - Pending/Approved/Cancelled/CreditedToWallet
82
    OrderStatus - Placed/Cancelled/Delivered
83
    '''
84
    def parseOrderRawHtml(self, orderId, subtagId, userId, rawHtml, orderSuccessUrl):
85
 
86
        pass
87
 
88
if __name__ == '__main__':
89
    store = getStore(3)
90
    store.scrapeAffiliate()
91
 
92
 
93
def getBrowserObject():
94
    import cookielib
95
    br = mechanize.Browser(factory=mechanize.RobustFactory())
96
    cj = cookielib.LWPCookieJar()
97
    br.set_cookiejar(cj)
98
    br.set_handle_equiv(True)
99
    br.set_handle_redirect(True)
100
    br.set_handle_referer(True)
101
    br.set_handle_robots(False)
102
    br.set_debug_http(False)
103
    br.set_debug_redirects(False)
104
    br.set_debug_responses(False)
105
 
106
    br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
107
 
108
    br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),
109
                     ('Accept', 'text/html,application/xhtml+xml,application/json,application/xml;q=0.9,*/*;q=0.8'),
110
                     ('Accept-Encoding', 'gzip,deflate,sdch'),                  
111
                     ('Accept-Language', 'en-US,en;q=0.8'),                     
112
                     ('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]
113
    return br