Monday, December 23, 2013

Google Drive API - Rename duplicate files / folders

Here are the case:

You create an empty document. You forgot to name the file and just close the window after typing something. The file will stay in your Google Drive with the name "Untitle Docutment". You repeat that action some other times. You will end up having so many duplicate files (or folders) in your Google Drive with the same name.

...

Someday you will (I will) need to re-organize all the documents and folders in your Drive account. The first thing to do is to rename all the duplicate files or folders by modified date. The newest file will have the name (e.g. "My document"), and the older files will have number after the name ("My document (1)", "My document (2)",...)

The following snippet I wrote to do the renaming task and make the world alot easier:


from dateutil import parser


def rename_file(service, file_id, new_title):
    """Rename a file.

    Args:
    service: Drive API service instance.
    file_id: ID of the file to rename.
    new_title: New title for the file.
    Returns:
    Updated file metadata if successful, None otherwise.
    """
    try:
        file = {'title': new_title}

        # Rename the file.
        updated_file = service.files().patch(
                            fileId=file_id,
                            body=file,
                            fields='title').execute()

        return updated_file
    except BadStatusLine, badstatus:
        print 'Error when renaming file: %s' % badstatus
    except errors.HttpError, error:
        print 'Rename file error: %s' % error

    return None


def get_own_files(service):
    query = "'me' in owners and trashed =false"
    files = search_files(service, query)
    print "\nGot %s files\n" % len(files)
    return files


def get_unique_file_name_list(files):
    unique_file_name = []
    for file in files:
        if file['parents']:
            file_item = {}
            file_item['mimeType'] = file['mimeType']
            file_item['parentid'] = file['parents'][0]['id']
            file_item['title'] = file['title']
            if file_item not in unique_file_name:
                unique_file_name.append(file_item)
    return unique_file_name


# dup_files_dict = { datetime_obj1: file1, datetime_obj2: file2,}
def rename_dup_files_by_modified_date(service, dup_files_dict):
    order = dup_files_dict.keys()
    order.sort(reverse=True) # keys list in DESC order
    for i in order:
        if order.index(i) > 0:
            print "Renaming file %s" % dup_files_dict[i]['title']
            new_title = dup_files_dict[i]['title'] + " (" \
                                + str(order.index(i)) + ")"
            updated_file = rename_file(service, dup_files_dict[i]['id'], new_title)
            if updated_file:
                print "File %s has been renamed to %s" % (dup_files_dict[i]['title'], new_title)
            else:
                print "Fail to rename file %s" % dup_files_dict[i]['title']

    return dup_files_dict[order[0]]['id']


# rename duplicate files of a single user
def rename_all_dup_files(service):
    files = get_own_files(service)
    if files:
        filename_list = get_unique_file_name_list(files)
        if len(filename_list) < len(files):
            for fn in filename_list:
                dup_files_dict = {}
                for file in files:
                    if file['mimeType'] == fn['mimeType']:
                        if file['parents']:
                            if file['parents'][0]['id'] == fn['parentid']:
                                if file['title'] == fn['title']:
                                    dt = parser.parse(file['modifiedDate'])
                                    dup_files_dict[dt] = file
                if len(dup_files_dict) > 1:
                    rename_dup_files_by_modified_date(service, dup_files_dict)



For more information about using Google Drive API, please read my previous blog posts about it: http://iambusychangingtheworld.blogspot.com/

And the Google Drive Migration project github repository: https://github.com/dangtrinh/gdm