OC底层 - 类的加载（2） | magic-geng 移动端探索

OC底层 - 类的加载（2）

· 2020-10-28 · # object-c底层探索

通过对objc_init的分析以及map_images->_read_images->readClass这几个核心函数的初识之后，我们可以清晰的知道从dyld的启动加载->objc_init->_dyld_objc_notify_register，到把类从macho里面加载到内存的过程。但是类的属性、方法、协议等是怎么读取到rw和ro里面呢？带着还未解决的问题，我们继续来分析类的加载。

二、methodizeClass

我们通过上面的探索，已经知道了类的信息从macho中读取，并且设置了ro->rw->rwe。
但是比如我们进行方法慢速查找的过程中，是通过二分查找，而二分查找是需要先进行方法排序的。那么方法排序是怎么排的呢？下面就继续探索methodizeClass函数。

static void methodizeClass(Class cls, Class previously)
{
    runtimeLock.assertLocked();

    bool isMeta = cls->isMetaClass();
    auto rw = cls->data();
    auto ro = rw->ro();
    auto rwe = rw->ext();

这段代码是将rw、ro、rwe放到一个临时变量中。

接下来：

 // Install methods and properties that the class implements itself.
    method_list_t *list = ro->baseMethods();
    if (list) {
        //处理方法
        prepareMethodLists(cls, &list, 1, YES, isBundleClass(cls), nullptr);
        if (rwe) rwe->methods.attachLists(&list, 1);
    }

    property_list_t *proplist = ro->baseProperties;
    if (rwe && proplist) {
        //处理属性
        rwe->properties.attachLists(&proplist, 1);
    }

    protocol_list_t *protolist = ro->baseProtocols;
    if (rwe && protolist) {
        //处理协议
        rwe->protocols.attachLists(&protolist, 1);
    }

这里是处理方法、属性、协议。
我们来看prepareMethodLists函数：

static void 
prepareMethodLists(Class cls, method_list_t **addedLists, int addedCount,
                   bool baseMethods, bool methodsFromBundle, const char *why)
{
    runtimeLock.assertLocked();

    if (addedCount == 0) return;
    if (baseMethods) {
        ASSERT(cls->hasCustomAWZ() && cls->hasCustomRR() && cls->hasCustomCore());
    } else if (cls->cache.isConstantOptimizedCache()) {
        cls->setDisallowPreoptCachesRecursively(why);
    } else if (cls->allowsPreoptInlinedSels()) {
        //....省略
    }

    for (int i = 0; i < addedCount; i++) {
        method_list_t *mlist = addedLists[i];
        ASSERT(mlist);

        // Fixup selectors if necessary
        if (!mlist->isFixedUp()) {
            fixupMethodList(mlist, methodsFromBundle, true/*sort*/);
        }
    }

    if (cls->isInitialized()) {
        objc::AWZScanner::scanAddedMethodLists(cls, addedLists, addedCount);
        objc::RRScanner::scanAddedMethodLists(cls, addedLists, addedCount);
        objc::CoreScanner::scanAddedMethodLists(cls, addedLists, addedCount);
    }
}

其中核心函数fixupMethodList

static void 
fixupMethodList(method_list_t *mlist, bool bundleCopy, bool sort)
{
    runtimeLock.assertLocked();
    ASSERT(!mlist->isFixedUp());

    // fixme lock less in attachMethodLists ?
    // dyld3 may have already uniqued, but not sorted, the list
    if (!mlist->isUniqued()) {
        mutex_locker_t lock(selLock);
    
        // Unique selectors in list.
        for (auto& meth : *mlist) {
            const char *name = sel_cname(meth.name());
            meth.setName(sel_registerNameNoLock(name, bundleCopy));
        }
    }

    if (sort && !mlist->isSmallList() && mlist->entsize() == method_t::bigSize) {
        method_t::SortBySELAddress sorter;
        std::stable_sort(&mlist->begin()->big(), &mlist->end()->big(), sorter);
    }
    
    if (!mlist->isSmallList()) {
        mlist->setFixedUp();
    }
}

以上代码就是对方法进行排序，那么怎么排序的呢，我们打印一下执行过std::stable_sort之后的mlist：

可以看到，是通过sel的地址升序排序的。
那么sel是怎么来的呢？

SEL sel_registerNameNoLock(const char *name, bool copy) {
    return __sel_registerName(name, 0, copy);  // NO lock, maybe copy
}
static SEL __sel_registerName(const char *name, bool shouldLock, bool copy) 
{
    SEL result = 0;

    if (shouldLock) selLock.assertUnlocked();
    else selLock.assertLocked();

    if (!name) return (SEL)0;

    result = search_builtins(name);
    if (result) return result;
    
    conditional_mutex_locker_t lock(selLock, shouldLock);
	auto it = namedSelectors.get().insert(name);
	if (it.second) {
		// No match. Insert.
		*it.first = (const char *)sel_alloc(name, copy);
	}
	return (SEL)*it.first;
}

通过sel_registerNameNoLock -> __sel_registerName 可以看到。
（1）有一个sel_alloc进行了开辟，所以sel是有内存地址的。
（2）其中namedSelectors这张表就是在_read_images中，sel_init创建的。
（3）search_builtins方法：

static SEL search_builtins(const char *name) 
{
#if SUPPORT_PREOPT
  if (SEL result = (SEL)_dyld_get_objc_selector(name))
    return result;
#endif
    return nil;
}
const char* _dyld_get_objc_selector(const char* selName)
{
    // Check the shared cache table if it exists.
    if ( gObjCOpt != nullptr ) {
        if ( const objc_opt::objc_selopt_t* selopt = gObjCOpt->selopt() ) {
            const char* name = selopt->get(selName);
            if (name != nullptr)
                return name;
        }
    }

    if ( gUseDyld3 )
        return dyld3::_dyld_get_objc_selector(selName);

    return nullptr;
}

因为一些系统的库，比如UIkit，它的sel需要从共享缓存中拿，所以会跳转到dyld中去根据name拿对应的sel。如果是dyld3,那么继续走dyld3中的_dyld_get_objc_selector

const char* _dyld_get_objc_selector(const char* selName)
{
    log_apis("dyld_get_objc_selector()\n");
    return gAllImages.getObjCSelector(selName);
}

const char* AllImages::getObjCSelector(const char *selName) const {
    if ( _objcSelectorHashTable == nullptr )
        return nullptr;
    return _objcSelectorHashTable->getString(selName, _objcSelectorHashTableImages.array());
}

const char* ObjCStringTable::getString(const char* selName, const Array<uintptr_t>& baseAddresses) const {
    StringTarget target = getPotentialTarget(selName);
    if (target == sentinelTarget)
        return nullptr;

    dyld3::closure::Image::ObjCImageOffset imageAndOffset;
    imageAndOffset.raw = target;

    uintptr_t sectionBaseAddress = baseAddresses[imageAndOffset.imageIndex];

    const char* value = (const char*)(sectionBaseAddress + imageAndOffset.imageOffset);
    if (!strcmp(selName, value))
        return value;
    return nullptr;
}

从getString函数中可以看到，会返回地址。

注意：如果此时有分类同名方法的话，会默认根据同名方法的imp进行升序排序。

再回到methodizeClass函数中，执行完方法排序之后，如果rwe有值，再对rwe进行处理。

三、懒加载类和非懒加载类

我们回到调用realizeClassWithoutSwift的地方，可以看到代码注释：Realize non-lazy classes (for +load methods and static instances(暂不关注))

实现一个非懒加载类。在类中添加+(void)load方法之后，就会从懒加载类变成非懒加载类。

因为+(void)load方法是在load_imags的时候就调用了，如果此时没有去加载实现类，那么怎么能够调用+(void)方法呢，所有如果类写了+(void)load方法，它就会提前去加载信息。

我们知道了非懒加载的情况，那么懒加载类是怎么去加载信息的呢？
我们在realizeClassWithoutSwift(Class cls, Class previously)打断点，然后bt查看堆栈

可以看到：lookUpImpOrForward->realizeAndInitializeIfNeeded_locked->realizeClassMaybeSwiftAndLeaveLocked->realizeClassMaybeSwiftMaybeRelock

证明懒加载类是在第一次消息发送的时候去加载信息的。

那为什么要区分懒加载和非懒加载呢？
因为加载类信息的时候，会处理很多的方法，创建很多的临时变量，还要进行排序等，比较耗时，如果都放在main函数之前来做，会增加启动的时间。

小结：

懒加载类的情况：

lookUpImpOrForward - > realizeClassMaybeSwiftMaybeRelock -> realizeClassWithoutSwift -> methodizeClass

非懒加载类的情况：

readClass->_getObjc2NonlazyClassList -> remapClass -> realizeClassWithoutSwift -> methodizeClass

四、分类的本质

先定义一个MyClassTest的分类

@protocol TestProtocol <NSObject>

- (void)testProtocol;

@end

@interface MyClassTest (Test) <TestProtocol>

@property (nonatomic, copy) NSString *cate_name;
@property (nonatomic, assign) int cate_age;

- (void)cate_instanceMethod1;
- (void)cate_instanceMethod2;
- (void)cate_instanceMethod3;
+ (void)cate_classMethod;

@end

@implementation MyClassTest (Test)

- (void)cate_instanceMethod1 {
    NSLog(@"%s",__FUNCTION__);
}

- (void)cate_instanceMethod2 {
    NSLog(@"%s",__FUNCTION__);
}

- (void)cate_instanceMethod3 {
    NSLog(@"%s",__FUNCTION__);
}

+ (void)cate_classMethod {
    NSLog(@"%s",__FUNCTION__);
}

@end

通过clang -rewrite-objc MyClassTest-Test.m -o MyClassTest-Test.cpp
编译成c++

//分类：方法 -> attachtoclass
struct _category_t {
	const char *name;   
	struct _class_t *cls;  
	const struct _method_list_t *instance_methods;
	const struct _method_list_t *class_methods;
	const struct _protocol_list_t *protocols;
	const struct _prop_list_t *properties;
};
static struct _category_t _OBJC_$_CATEGORY_MyClassTest_$_Test __attribute__ ((used, section ("__DATA,__objc_const"))) = 
{
	"MyClassTest",
	0, // &OBJC_CLASS_$_MyClassTest,
	(const struct _method_list_t *)&_OBJC_$_CATEGORY_INSTANCE_METHODS_MyClassTest_$_Test,
	(const struct _method_list_t *)&_OBJC_$_CATEGORY_CLASS_METHODS_MyClassTest_$_Test,
	(const struct _protocol_list_t *)&_OBJC_CATEGORY_PROTOCOLS_$_MyClassTest_$_Test,
	(const struct _prop_list_t *)&_OBJC_$_PROP_LIST_MyClassTest_$_Test,
};

可以看到category_t的结构。

这里先说一个经典的面试题：为什么分类里面不能直接添加属性呢？

是因为分类里面没有对应的get和set方法，并且没有成员变量。

五、classProperties

struct category_t {
    const char *name;
    classref_t cls;
    WrappedPtr<method_list_t, PtrauthStrip> instanceMethods;
    WrappedPtr<method_list_t, PtrauthStrip> classMethods;
    struct protocol_list_t *protocols;
    struct property_list_t *instanceProperties;
    // Fields below this point are not always present on disk.
    struct property_list_t *_classProperties;

    method_list_t *methodsForMeta(bool isMeta) {
        if (isMeta) return classMethods;
        else return instanceMethods;
    }

    property_list_t *propertiesForMeta(bool isMeta, struct header_info *hi);
    
    protocol_list_t *protocolsForMeta(bool isMeta) {
        if (isMeta) return nullptr;
        else return protocols;
    }
};

通过clang编译之后，我们再看objc的源码，可以看到category_t的结构将properties分成了instanceProperties 和 _classProperties。
instanceProperties我们很好理解，就是对象属性。但是_classProperties是什么呢？

Objective-C Class Properties 早在 WWDC 2016 中就已经公示，给 Objective-C 加入这个特性主要是为了与 Swift 类型属性相互操作。

@interface MyClassTest : NSObject

@property (nonatomic, copy) NSString *mgName;
@property (class, nonatomic) NSString *version;
- (void)testInstance;
+ (void)classInstance;

@end
@implementation MyClassTest
static NSString* _version = @"0.1.1";

+ (NSString *)version {
    return _version;
}
//...
@end

比如我定一个类属性，在内部定义一个静态变量，然后在定一个类的get方法 + (NSString *)version。那么在外部就可以调用类属性读取version。
对于的swift的类型属性：

class SwiftClassText: NSObject {
    class var version: String {
        return "0.1.1"
    }
}

这么做的好处就是，可以方便的读取类的信息，但是需要自己写get/set方法。
看一些文章还说可以方法做组件化解耦合，这里我没有去尝试过，未来可以试试看。