Subversion Repositories SmartDukaan

Rev

Blame | Last modification | View Log | RSS feed

'''
Created on Jan 15, 2015

@author: amit
'''
from pymongo.mongo_client import MongoClient
import importlib
import mechanize
sourceMap = {1:"amazon", 2:"flipkart", 3:"snapdeal"}

def getStore(source_id):
    #module = sourceMap[source_id]
    store = Store(source_id)
    module = importlib.import_module("dtr.sources." + sourceMap[source_id])
    store = getattr(module, "Store")(source_id)
    return store

class ScrapeException(Exception):
    """Exception raised for errors in the input.

    Attributes:
        expr -- input expression in which the error occurred
        msg  -- explanation of the error
    """

    def __init__(self, expr, msg):
        self.expr = expr
        self.msg = msg

class ParseException(Exception):
    """Exception raised for errors in the input.

    Attributes:
        expr -- input expression in which the error occurred
        msg  -- explanation of the error
    """

    def __init__(self, expr, msg):
        self.expr = expr
        self.msg = msg

class Store(object):
    
    ORDER_PLACED = 'Order Placed'
    ORDER_DELIVERED = 'Delivered'
    ORDER_SHIPPED = 'Shipped' #Lets see if we can make use of it
    ORDER_CANCELLED = 'Cancelled'
    
    
    def __init__(self, store_id):
        self.store_id = store_id
    
    
    def getName(self):
        raise NotImplementedError
    
    def scrapeAffiliate(self, startDate=None, endDate=None):
        raise NotImplementedError
    
    def parseOrderPage(self, htmlString=None): 
        raise NotImplementedError
    
    def saveToAffiliate(self, offers):
        raise NotImplementedError
    
    def scrapeStoreOrders(self,):
        raise NotImplementedError
    '''
    Parses the order for specific store
    
    order id, total amount, created on(now() if could not parse
    suborder id, title, quantity, unit price, expected delivery date,
    status (default would be Order placed)
    
    once products are identified, each suborder can then be updated
    with respective cashback.
    
    Possible fields to display for Not yet delivered orders are 
    Product/Quantity/Amount/Store/CashbackAmount/OrderDate/ExpectedDelivery/OrderStaus/DetailedStatus/CashbackStatus
    No need to show cancelled orders.
    CashbackStatus - Pending/Approved/Cancelled/CreditedToWallet
    OrderStatus - Placed/Cancelled/Delivered
    '''
    def parseOrderRawHtml(self, orderId, subtagId, userId, rawHtml, orderSuccessUrl):
        
        pass
    
if __name__ == '__main__':
    store = getStore(3)
    store.scrapeAffiliate()


def getBrowserObject():
    import cookielib
    br = mechanize.Browser(factory=mechanize.RobustFactory())
    cj = cookielib.LWPCookieJar()
    br.set_cookiejar(cj)
    br.set_handle_equiv(True)
    br.set_handle_redirect(True)
    br.set_handle_referer(True)
    br.set_handle_robots(False)
    br.set_debug_http(False)
    br.set_debug_redirects(False)
    br.set_debug_responses(False)
    
    br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
    
    br.addheaders = [('User-agent','Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'),
                     ('Accept', 'text/html,application/xhtml+xml,application/json,application/xml;q=0.9,*/*;q=0.8'),
                     ('Accept-Encoding', 'gzip,deflate,sdch'),                  
                     ('Accept-Language', 'en-US,en;q=0.8'),                     
                     ('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.3')]
    return br